/usr/share/gocode/src/gopkg.in/neurosnap/sentences.v1/storage.go is in golang-gopkg-neurosnap-sentences.v1-dev 1.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | package sentences
import "encoding/json"
// SetString is an implementation of a set of strings
// probably not the best way to do this but oh well.
type SetString map[string]int
// Add adds a string key to the set
func (ss SetString) Add(str string) {
ss[str] = 1
}
// Remove deletes a string key from the set
func (ss SetString) Remove(str string) {
delete(ss, str)
}
// Has checks whether a key exists in the set
func (ss SetString) Has(str string) bool {
if ss[str] == 0 {
return false
}
return true
}
// Array returns and array of keys from the set
func (ss SetString) Array() []string {
arr := make([]string, 0, len(ss))
for key := range ss {
arr = append(arr, key)
}
return arr
}
// Storage stores data used to perform sentence boundary detection with punkt
// This is where all the training data gets stored for future use
type Storage struct {
AbbrevTypes SetString `json:"AbbrevTypes"`
Collocations SetString `json:"Collocations"`
SentStarters SetString `json:"SentStarters"`
OrthoContext SetString `json:"OrthoContext"`
}
// LoadTraining is the primary function to load JSON training data. By default, the sentence tokenizer
// loads in english automatically, but other languages could be loaded into a
// binary file using the `make <lang>` command.
func LoadTraining(data []byte) (*Storage, error) {
var storage Storage
err := json.Unmarshal(data, &storage)
if err != nil {
return nil, err
}
return &storage, nil
}
// NewStorage creates the default storage container
func NewStorage() *Storage {
return &Storage{SetString{}, SetString{}, SetString{}, SetString{}}
}
// Used in the training to add a type to the ortho context
func (p *Storage) addOrthoContext(typ string, flag int) {
p.OrthoContext[typ] |= flag
}
// IsAbbr detemines if any of the tokens are an abbreviation
func (p *Storage) IsAbbr(tokens ...string) bool {
for _, token := range tokens {
if p.AbbrevTypes.Has(token) {
return true
}
}
return false
}
|