/usr/share/gocode/src/gopkg.in/neurosnap/sentences.v1/english/main_test.go is in golang-gopkg-neurosnap-sentences.v1-dev 1.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | package english
import (
"testing"
)
var tokenizer, _ = NewSentenceTokenizer(nil)
func TestEnglishSmartQuotes(t *testing.T) {
t.Log("Tokenizer should break sentences that end in smart quotes ...")
actualText := "Here is a quote, ”a smart one.” Will this break properly?"
actual := tokenizer.Tokenize(actualText)
expected := []string{
"Here is a quote, ”a smart one.”",
" Will this break properly?",
}
if len(actual) != len(expected) {
t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
}
for index, sent := range actual {
if sent.Text != expected[index] {
t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
}
}
}
func TestEnglishCustomAbbrev(t *testing.T) {
t.Log("Tokenizer should detect custom abbreviations and not always sentence break on them.")
actualText := "One custom abbreviation is F.B.I. The abbreviation, F.B.I. should properly break."
actual := tokenizer.Tokenize(actualText)
expected := []string{
"One custom abbreviation is F.B.I.",
" The abbreviation, F.B.I. should properly break.",
}
if len(actual) != len(expected) {
t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
}
for index, sent := range actual {
if sent.Text != expected[index] {
t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
}
}
actualText = "An abbreviation near the end of a G.D. sentence. J.G. Wentworth was cool."
actual = tokenizer.Tokenize(actualText)
expected = []string{
"An abbreviation near the end of a G.D. sentence.",
" J.G. Wentworth was cool.",
}
if len(actual) != len(expected) {
t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
}
for index, sent := range actual {
if sent.Text != expected[index] {
t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
}
}
}
func TestEnglishSupervisedAbbrev(t *testing.T) {
t.Log("Tokenizer should detect list of supervised abbreviations.")
actualText := "I am a Sgt. in the army. I am a No. 1 student. The Gov. of Michigan is a dick."
actual := tokenizer.Tokenize(actualText)
expected := []string{
"I am a Sgt. in the army.",
" I am a No. 1 student.",
" The Gov. of Michigan is a dick.",
}
if len(actual) != len(expected) {
t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
}
for index, sent := range actual {
if sent.Text != expected[index] {
t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
}
}
}
func TestEnglishSemicolon(t *testing.T) {
t.Log("Tokenizer should parse sentences with semicolons")
actualText := "I am here; you are over there. Will the tokenizer output two complete sentences?"
actual := tokenizer.Tokenize(actualText)
expected := []string{
"I am here; you are over there.",
" Will the tokenizer output two complete sentences?",
}
if len(actual) != len(expected) {
t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
}
for index, sent := range actual {
if sent.Text != expected[index] {
t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
}
}
}
|