This file is indexed.

/usr/share/gocode/src/gopkg.in/neurosnap/sentences.v1/english/main_test.go is in golang-gopkg-neurosnap-sentences.v1-dev 1.0.6-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package english

import (
	"testing"
)

var tokenizer, _ = NewSentenceTokenizer(nil)

func TestEnglishSmartQuotes(t *testing.T) {
	t.Log("Tokenizer should break sentences that end in smart quotes ...")

	actualText := "Here is a quote, ”a smart one.” Will this break properly?"
	actual := tokenizer.Tokenize(actualText)

	expected := []string{
		"Here is a quote, ”a smart one.”",
		" Will this break properly?",
	}

	if len(actual) != len(expected) {
		t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
	}

	for index, sent := range actual {
		if sent.Text != expected[index] {
			t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
		}
	}
}

func TestEnglishCustomAbbrev(t *testing.T) {
	t.Log("Tokenizer should detect custom abbreviations and not always sentence break on them.")

	actualText := "One custom abbreviation is F.B.I.  The abbreviation, F.B.I. should properly break."
	actual := tokenizer.Tokenize(actualText)

	expected := []string{
		"One custom abbreviation is F.B.I.",
		"  The abbreviation, F.B.I. should properly break.",
	}

	if len(actual) != len(expected) {
		t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
	}

	for index, sent := range actual {
		if sent.Text != expected[index] {
			t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
		}
	}

	actualText = "An abbreviation near the end of a G.D. sentence.  J.G. Wentworth was cool."
	actual = tokenizer.Tokenize(actualText)

	expected = []string{
		"An abbreviation near the end of a G.D. sentence.",
		"  J.G. Wentworth was cool.",
	}

	if len(actual) != len(expected) {
		t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
	}

	for index, sent := range actual {
		if sent.Text != expected[index] {
			t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
		}
	}
}

func TestEnglishSupervisedAbbrev(t *testing.T) {
	t.Log("Tokenizer should detect list of supervised abbreviations.")

	actualText := "I am a Sgt. in the army.  I am a No. 1 student.  The Gov. of Michigan is a dick."
	actual := tokenizer.Tokenize(actualText)

	expected := []string{
		"I am a Sgt. in the army.",
		"  I am a No. 1 student.",
		"  The Gov. of Michigan is a dick.",
	}

	if len(actual) != len(expected) {
		t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
	}

	for index, sent := range actual {
		if sent.Text != expected[index] {
			t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
		}
	}
}

func TestEnglishSemicolon(t *testing.T) {
	t.Log("Tokenizer should parse sentences with semicolons")

	actualText := "I am here; you are over there.  Will the tokenizer output two complete sentences?"
	actual := tokenizer.Tokenize(actualText)

	expected := []string{
		"I am here; you are over there.",
		"  Will the tokenizer output two complete sentences?",
	}

	if len(actual) != len(expected) {
		t.Fatalf("Actual: %d, Expected: %d", len(actual), len(expected))
	}

	for index, sent := range actual {
		if sent.Text != expected[index] {
			t.Fatalf("Actual: %s\nExpected: %s", sent.Text, expected[index])
		}
	}
}