/usr/share/gocode/src/golang.org/x/text/cases/icu_test.go is in golang-golang-x-text-dev 0.0~git20170627.0.6353ef0-1ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | // Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
import (
"path"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
func TestICUConformance(t *testing.T) {
// Build test set.
input := []string{
"a.a a_a",
"a\u05d0a",
"\u05d0'a",
"a\u03084a",
"a\u0308a",
"a3\u30a3a",
"a\u303aa",
"a_\u303a_a",
"1_a..a",
"1_a.a",
"a..a.",
"a--a-",
"a-a-",
"a\u200ba",
"a\u200b\u200ba",
"a\u00ad\u00ada", // Format
"a\u00ada",
"a''a", // SingleQuote
"a'a",
"a::a", // MidLetter
"a:a",
"a..a", // MidNumLet
"a.a",
"a;;a", // MidNum
"a;a",
"a__a", // ExtendNumlet
"a_a",
"ΟΣ''a",
}
add := func(x interface{}) {
switch v := x.(type) {
case string:
input = append(input, v)
case []string:
for _, s := range v {
input = append(input, s)
}
}
}
for _, tc := range testCases {
add(tc.src)
add(tc.lower)
add(tc.upper)
add(tc.title)
}
for _, tc := range bufferTests {
add(tc.src)
}
for _, tc := range breakTest {
add(strings.Replace(tc, "|", "", -1))
}
for _, tc := range foldTestCases {
add(tc)
}
// Compare ICU to Go.
for _, c := range []string{"lower", "upper", "title", "fold"} {
for _, tag := range []string{
"und", "af", "az", "el", "lt", "nl", "tr",
} {
for _, s := range input {
if exclude(c, tag, s) {
continue
}
testtext.Run(t, path.Join(c, tag, s), func(t *testing.T) {
want := doICU(tag, c, s)
got := doGo(tag, c, s)
if norm.NFC.String(got) != norm.NFC.String(want) {
t.Errorf("\n in %[3]q (%+[3]q)\n got %[1]q (%+[1]q)\n want %[2]q (%+[2]q)", got, want, s)
}
})
}
}
}
}
// exclude indicates if a string should be excluded from testing.
func exclude(cm, tag, s string) bool {
list := []struct{ cm, tags, pattern string }{
// TODO: Go does not handle certain esoteric breaks correctly. This will be
// fixed once we have a real word break iterator. Alternatively, it
// seems like we're not too far off from making it work, so we could
// fix these last steps. But first verify that using a separate word
// breaker does not hurt performance.
{"title", "af nl", "a''a"},
{"", "", "א'a"},
// All the exclusions below seem to be issues with the ICU
// implementation (at version 57) and thus are not marked as TODO.
// ICU does not handle leading apostrophe for Dutch and
// Afrikaans correctly. See http://unicode.org/cldr/trac/ticket/7078.
{"title", "af nl", "'n"},
{"title", "af nl", "'N"},
// Go terminates the final sigma check after a fixed number of
// ignorables have been found. This ensures that the algorithm can make
// progress in a streaming scenario.
{"lower title", "", "\u039f\u03a3...............................a"},
// This also applies to upper in Greek.
// NOTE: we could fix the following two cases by adding state to elUpper
// and aztrLower. However, considering a modifier to not belong to the
// preceding letter after the maximum modifiers count is reached is
// consistent with the behavior of unicode/norm.
{"upper", "el", "\u03bf" + strings.Repeat("\u0321", 29) + "\u0313"},
{"lower", "az tr lt", "I" + strings.Repeat("\u0321", 30) + "\u0307\u0300"},
{"upper", "lt", "i" + strings.Repeat("\u0321", 30) + "\u0307\u0300"},
{"lower", "lt", "I" + strings.Repeat("\u0321", 30) + "\u0300"},
// ICU title case seems to erroneously removes \u0307 from an upper case
// I unconditionally, instead of only when lowercasing. The ICU
// transform algorithm transforms these cases consistently with our
// implementation.
{"title", "az tr", "\u0307"},
// The spec says to remove \u0307 after Soft-Dotted characters. ICU
// transforms conform but ucasemap_utf8ToUpper does not.
{"upper title", "lt", "i\u0307"},
{"upper title", "lt", "i" + strings.Repeat("\u0321", 29) + "\u0307\u0300"},
// Both Unicode and CLDR prescribe an extra explicit dot above after a
// Soft_Dotted character if there are other modifiers.
// ucasemap_utf8ToUpper does not do this; ICU transforms do.
// The issue with ucasemap_utf8ToUpper seems to be that it does not
// consider the modifiers that are part of composition in the evaluation
// of More_Above. For instance, according to the More_Above rule for lt,
// a dotted capital I (U+0130) becomes i\u0307\u0307 (an small i with
// two additional dots). This seems odd, but is correct. ICU is
// definitely not correct as it produces different results for different
// normal forms. For instance, for an İ:
// \u0130 (NFC) -> i\u0307 (incorrect)
// I\u0307 (NFD) -> i\u0307\u0307 (correct)
// We could argue that we should not add a \u0307 if there already is
// one, but this may be hard to get correct and is not conform the
// standard.
{"lower title", "lt", "\u0130"},
{"lower title", "lt", "\u00cf"},
// We are conform ICU ucasemap_utf8ToUpper if we remove support for
// elUpper. However, this is clearly not conform the spec. Moreover, the
// ICU transforms _do_ implement this transform and produces results
// consistent with our implementation. Note that we still prefer to use
// ucasemap_utf8ToUpper instead of transforms as the latter have
// inconsistencies in the word breaking algorithm.
{"upper", "el", "\u0386"}, // GREEK CAPITAL LETTER ALPHA WITH TONOS
{"upper", "el", "\u0389"}, // GREEK CAPITAL LETTER ETA WITH TONOS
{"upper", "el", "\u038A"}, // GREEK CAPITAL LETTER IOTA WITH TONOS
{"upper", "el", "\u0391"}, // GREEK CAPITAL LETTER ALPHA
{"upper", "el", "\u0397"}, // GREEK CAPITAL LETTER ETA
{"upper", "el", "\u0399"}, // GREEK CAPITAL LETTER IOTA
{"upper", "el", "\u03AC"}, // GREEK SMALL LETTER ALPHA WITH TONOS
{"upper", "el", "\u03AE"}, // GREEK SMALL LETTER ALPHA WITH ETA
{"upper", "el", "\u03AF"}, // GREEK SMALL LETTER ALPHA WITH IOTA
{"upper", "el", "\u03B1"}, // GREEK SMALL LETTER ALPHA
{"upper", "el", "\u03B7"}, // GREEK SMALL LETTER ETA
{"upper", "el", "\u03B9"}, // GREEK SMALL LETTER IOTA
}
for _, x := range list {
if x.cm != "" && strings.Index(x.cm, cm) == -1 {
continue
}
if x.tags != "" && strings.Index(x.tags, tag) == -1 {
continue
}
if strings.Index(s, x.pattern) != -1 {
return true
}
}
return false
}
func doGo(tag, caser, input string) string {
var c Caser
t := language.MustParse(tag)
switch caser {
case "lower":
c = Lower(t)
case "upper":
c = Upper(t)
case "title":
c = Title(t)
case "fold":
c = Fold()
}
return c.String(input)
}
|