/usr/share/gocode/src/gopkg.in/neurosnap/sentences.v1/utils/frequency_dist.go is in golang-gopkg-neurosnap-sentences.v1-dev 1.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | package utils
import "fmt"
/*
A frequency distribution for the outcomes of an experiment. A
frequency distribution records the number of times each outcome of
an experiment has occurred. For example, a frequency distribution
could be used to record the frequency of each word type in a
document. Formally, a frequency distribution can be defined as a
function mapping from each sample to the number of times that
sample occurred as an outcome.
Frequency distributions are generally constructed by running a
number of experiments, and incrementing the count for a sample
every time it is an outcome of an experiment.
*/
type FreqDist struct {
Samples map[string]int
}
func NewFreqDist(samples map[string]int) *FreqDist {
return &FreqDist{samples}
}
// Return the total number of sample outcomes that have been recorded by this FreqDist.
func (f *FreqDist) N() float64 {
sum := 0.0
for _, val := range f.Samples {
sum += float64(val)
}
return sum
}
// Return the total number of sample values (or "bins") that have counts greater than zero.
func (f *FreqDist) B() int {
return len(f.Samples)
}
// Return a list of all Samples that occur once (hapax legomena)
func (f *FreqDist) hapaxes() []string {
hap := make([]string, 0, f.B())
for key, val := range f.Samples {
if val != 1 {
continue
}
hap = append(hap, key)
}
return hap
}
// Return the dictionary mapping r to Nr, the number of Samples with frequency r, where Nr > 0
func (f *FreqDist) rToNr(bins int) map[int]int {
tmpRToNr := map[int]int{}
for _, value := range f.Samples {
tmpRToNr[value] += 1
}
if bins == 0 {
tmpRToNr[0] = 0
} else {
tmpRToNr[0] = bins - f.B()
}
return tmpRToNr
}
// Return the cumulative frequencies of the specified Samples.
// If no Samples are specified, all counts are returned, starting with the largest.
func (f *FreqDist) cumulativeFrequencies(Samples []string) []int {
cf := make([]int, 0, len(f.Samples))
for _, val := range Samples {
cf = append(cf, f.Samples[val])
}
return cf
}
/*
Return the frequency of a given sample. The frequency of a
sample is defined as the count of that sample divided by the
total number of sample outcomes that have been recorded by
this FreqDist. The count of a sample is defined as the
number of times that sample outcome was recorded by this
FreqDist. Frequencies are always real numbers in the range
[0, 1].
*/
func (f *FreqDist) freq(sample string) float64 {
if f.N() == 0 {
return 0
}
return float64(f.Samples[sample]) / f.N()
}
type maxFreq struct {
Key string
Val int
}
/*
Return the sample with the greatest number of outcomes in this
frequency distribution. If two or more Samples have the same
number of outcomes, return one of them; which sample is
returned is undefined.
*/
func (f *FreqDist) max() (string, error) {
if len(f.Samples) == 0 {
return "", fmt.Errorf("No Samples loaded, please add samples before getting max")
}
max := maxFreq{}
for key, val := range f.Samples {
if val > max.Val {
max.Key = key
max.Val = val
}
}
return max.Key, nil
}
|