/usr/share/gocode/src/golang.org/x/exp/utf8string/string.go is in golang-golang-x-exp-dev 0.0~git20150826.1.eb7c1fa-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | // Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package utf8string provides an efficient way to index strings by rune rather than by byte.
package utf8string // import "golang.org/x/exp/utf8string"
import (
"errors"
"unicode/utf8"
)
// String wraps a regular string with a small structure that provides more
// efficient indexing by code point index, as opposed to byte index.
// Scanning incrementally forwards or backwards is O(1) per index operation
// (although not as fast a range clause going forwards). Random access is
// O(N) in the length of the string, but the overhead is less than always
// scanning from the beginning.
// If the string is ASCII, random access is O(1).
// Unlike the built-in string type, String has internal mutable state and
// is not thread-safe.
type String struct {
str string
numRunes int
// If width > 0, the rune at runePos starts at bytePos and has the specified width.
width int
bytePos int
runePos int
nonASCII int // byte index of the first non-ASCII rune.
}
// NewString returns a new UTF-8 string with the provided contents.
func NewString(contents string) *String {
return new(String).Init(contents)
}
// Init initializes an existing String to hold the provided contents.
// It returns a pointer to the initialized String.
func (s *String) Init(contents string) *String {
s.str = contents
s.bytePos = 0
s.runePos = 0
for i := 0; i < len(contents); i++ {
if contents[i] >= utf8.RuneSelf {
// Not ASCII.
s.numRunes = utf8.RuneCountInString(contents)
_, s.width = utf8.DecodeRuneInString(contents)
s.nonASCII = i
return s
}
}
// ASCII is simple. Also, the empty string is ASCII.
s.numRunes = len(contents)
s.width = 0
s.nonASCII = len(contents)
return s
}
// String returns the contents of the String. This method also means the
// String is directly printable by fmt.Print.
func (s *String) String() string {
return s.str
}
// RuneCount returns the number of runes (Unicode code points) in the String.
func (s *String) RuneCount() int {
return s.numRunes
}
// IsASCII returns a boolean indicating whether the String contains only ASCII bytes.
func (s *String) IsASCII() bool {
return s.width == 0
}
// Slice returns the string sliced at rune positions [i:j].
func (s *String) Slice(i, j int) string {
// ASCII is easy. Let the compiler catch the indexing error if there is one.
if j < s.nonASCII {
return s.str[i:j]
}
if i < 0 || j > s.numRunes || i > j {
panic(sliceOutOfRange)
}
if i == j {
return ""
}
// For non-ASCII, after At(i), bytePos is always the position of the indexed character.
var low, high int
switch {
case i < s.nonASCII:
low = i
case i == s.numRunes:
low = len(s.str)
default:
s.At(i)
low = s.bytePos
}
switch {
case j == s.numRunes:
high = len(s.str)
default:
s.At(j)
high = s.bytePos
}
return s.str[low:high]
}
// At returns the rune with index i in the String. The sequence of runes is the same
// as iterating over the contents with a "for range" clause.
func (s *String) At(i int) rune {
// ASCII is easy. Let the compiler catch the indexing error if there is one.
if i < s.nonASCII {
return rune(s.str[i])
}
// Now we do need to know the index is valid.
if i < 0 || i >= s.numRunes {
panic(outOfRange)
}
var r rune
// Five easy common cases: within 1 spot of bytePos/runePos, or the beginning, or the end.
// With these cases, all scans from beginning or end work in O(1) time per rune.
switch {
case i == s.runePos-1: // backing up one rune
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos = i
s.bytePos -= s.width
return r
case i == s.runePos+1: // moving ahead one rune
s.runePos = i
s.bytePos += s.width
fallthrough
case i == s.runePos:
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
return r
case i == 0: // start of string
r, s.width = utf8.DecodeRuneInString(s.str)
s.runePos = 0
s.bytePos = 0
return r
case i == s.numRunes-1: // last rune in string
r, s.width = utf8.DecodeLastRuneInString(s.str)
s.runePos = i
s.bytePos = len(s.str) - s.width
return r
}
// We need to do a linear scan. There are three places to start from:
// 1) The beginning
// 2) bytePos/runePos.
// 3) The end
// Choose the closest in rune count, scanning backwards if necessary.
forward := true
if i < s.runePos {
// Between beginning and pos. Which is closer?
// Since both i and runePos are guaranteed >= nonASCII, that's the
// lowest location we need to start from.
if i < (s.runePos-s.nonASCII)/2 {
// Scan forward from beginning
s.bytePos, s.runePos = s.nonASCII, s.nonASCII
} else {
// Scan backwards from where we are
forward = false
}
} else {
// Between pos and end. Which is closer?
if i-s.runePos < (s.numRunes-s.runePos)/2 {
// Scan forward from pos
} else {
// Scan backwards from end
s.bytePos, s.runePos = len(s.str), s.numRunes
forward = false
}
}
if forward {
// TODO: Is it much faster to use a range loop for this scan?
for {
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
if s.runePos == i {
break
}
s.runePos++
s.bytePos += s.width
}
} else {
for {
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos--
s.bytePos -= s.width
if s.runePos == i {
break
}
}
}
return r
}
var outOfRange = errors.New("utf8string: index out of range")
var sliceOutOfRange = errors.New("utf8string: slice index out of range")
|