/usr/share/gocode/src/gopkg.in/neurosnap/sentences.v1/token.go is in golang-gopkg-neurosnap-sentences.v1-dev 1.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | package sentences
import (
"fmt"
"regexp"
)
// TokenGrouper two adjacent tokens together.
type TokenGrouper interface {
Group([]*Token) [][2]*Token
}
// DefaultTokenGrouper is the default implementation of TokenGrouper
type DefaultTokenGrouper struct{}
// Group is the primary logic for implementing TokenGrouper
func (p *DefaultTokenGrouper) Group(tokens []*Token) [][2]*Token {
if len(tokens) == 0 {
return nil
}
pairTokens := make([][2]*Token, 0, len(tokens))
prevToken := tokens[0]
for _, tok := range tokens {
if prevToken == tok {
continue
}
pairTokens = append(pairTokens, [2]*Token{prevToken, tok})
prevToken = tok
}
pairTokens = append(pairTokens, [2]*Token{prevToken, nil})
return pairTokens
}
// Token stores a token of text with annotations produced during sentence boundary detection.
type Token struct {
Tok string
Position int
SentBreak bool
ParaStart bool
LineStart bool
Abbr bool
periodFinal bool
reEllipsis *regexp.Regexp
reNumeric *regexp.Regexp
reInitial *regexp.Regexp
reAlpha *regexp.Regexp
}
var reEllipsis = regexp.MustCompile(`\.\.+$`)
var reNumeric = regexp.MustCompile(`-?[\.,]?\d[\d,\.-]*\.?$`)
var reInitial = regexp.MustCompile(`^[A-Za-z]\.$`)
var reAlpha = regexp.MustCompile(`^[A-Za-z]+$`)
// NewToken is the default implementation of the Token struct
func NewToken(token string) *Token {
tok := Token{
Tok: token,
reEllipsis: reEllipsis,
reNumeric: reNumeric,
reInitial: reInitial,
reAlpha: reAlpha,
}
return &tok
}
// String is the string representation of Token
func (p *Token) String() string {
return fmt.Sprintf("<Token Tok: %q, SentBreak: %t, Abbr: %t, Position: %d>", p.Tok, p.SentBreak, p.Abbr, p.Position)
}
|