This file is indexed.

/usr/include/tesseract/statistc.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/**********************************************************************
 * File:        statistc.h  (Formerly stats.h)
 * Description: Class description for STATS class.
 * Author:					Ray Smith
 * Created:					Mon Feb 04 16:19:07 GMT 1991
 *
 * (C) Copyright 1991, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifndef TESSERACT_CCSTRUCT_STATISTC_H_
#define TESSERACT_CCSTRUCT_STATISTC_H_

#include <stdio.h>
#include "host.h"
#include "scrollview.h"

// Simple histogram-based statistics for integer values in a known
// range, such that the range is small compared to the number of samples.
class STATS {
 public:
  // The histogram buckets are in the range
  // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
  // [min_bucket_value, max_bucket_value].
  // Any data under min_bucket value is silently mapped to min_bucket_value,
  // and likewise, any data over max_bucket_value is silently mapped to
  // max_bucket_value.
  // In the internal array, min_bucket_value maps to 0 and
  // max_bucket_value_plus_1 - min_bucket_value to the array size.
  // TODO(rays) This is ugly. Convert the second argument to
  // max_bucket_value and all the code that uses it.
  STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1);
  STATS();  // empty for arrays

  ~STATS();

  // (Re)Sets the range and clears the counts.
  // See the constructor for info on max and min values.
  bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1);

  void clear();  // empty buckets

  void add(inT32 value, inT32 count);

  // "Accessors" return various statistics on the data.
  inT32 mode() const;  // get mode of samples
  double mean() const;  // get mean of samples
  double sd() const;  // standard deviation
  // Returns the fractile value such that frac fraction (in [0,1]) of samples
  // has a value less than the return value.
  double ile(double frac) const;
  // Returns the minimum used entry in the histogram (ie the minimum of the
  // data, NOT the minimum of the supplied range, nor is it an index.)
  // Would normally be called min(), but that is a reserved word in VC++.
  inT32 min_bucket() const;  // Find min
  // Returns the maximum used entry in the histogram (ie the maximum of the
  // data, NOT the maximum of the supplied range, nor is it an index.)
  inT32 max_bucket() const;  // Find max
  // Finds a more useful estimate of median than ile(0.5).
  // Overcomes a problem with ile() - if the samples are, for example,
  // 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
  // between 6 and 13 = 9.5
  double median() const;  // get median of samples
  // Returns the count of the given value.
  inT32 pile_count(inT32 value ) const {
    if (value <= rangemin_)
      return buckets_[0];
    if (value >= rangemax_ - 1)
      return buckets_[rangemax_ - rangemin_ - 1];
    return buckets_[value - rangemin_];
  }
  // Returns the total count of all buckets.
  inT32 get_total() const {
    return total_count_;        // total of all piles
  }
  // Returns true if x is a local min.
  bool local_min(inT32 x) const;

  // Apply a triangular smoothing filter to the stats.
  // This makes the modes a bit more useful.
  // The factor gives the height of the triangle, i.e. the weight of the
  // centre.
  void smooth(inT32 factor);

  // Cluster the samples into max_cluster clusters.
  // Each call runs one iteration. The array of clusters must be
  // max_clusters+1 in size as cluster 0 is used to indicate which samples
  // have been used.
  // The return value is the current number of clusters.
  inT32 cluster(float lower,         // thresholds
                float upper,
                float multiple,      // distance threshold
                inT32 max_clusters,  // max no to make
                STATS *clusters);    // array of clusters


  // Prints a summary and table of the histogram.
  void print() const;
  // Prints summary stats only of the histogram.
  void print_summary() const;

  // Draws the histogram as a series of rectangles.
  void plot(ScrollView* window,   // window to draw in
            float xorigin,   // origin of histo
            float yorigin,   // gram
            float xscale,    // size of one unit
            float yscale,    // size of one uint
            ScrollView::Color colour) const;  // colour to draw in

  // Draws a line graph of the histogram.
  void plotline(ScrollView* window,   // window to draw in
                float xorigin,   // origin of histo
                float yorigin,   // gram
                float xscale,    // size of one unit
                float yscale,    // size of one uint
                ScrollView::Color colour) const;  // colour to draw in
 private:
  inT32 rangemin_;                // min of range
  // rangemax_ is not well named as it is really one past the max.
  inT32 rangemax_;                // max of range
  inT32 total_count_;             // no of samples
  inT32* buckets_;                // array of cells
};

// Returns the nth ordered item from the array, as if they were
// ordered, but without ordering them, in linear time.
// The array does get shuffled!
inT32 choose_nth_item(inT32 index,   // index to choose
                      float *array,  // array of items
                      inT32 count);  // no of items
// Generic version uses a defined comparator (with qsort semantics).
inT32 choose_nth_item(inT32 index,   // index to choose
                      void *array,   // array of items
                      inT32 count,   // no of items
                      size_t size,   // element size
                      int (*compar)(const void*, const void*));  // comparator
// Swaps 2 entries in an array in-place.
void swap_entries(void *array,   // array of entries
                  size_t size,   // size of entry
                  inT32 index1,  // entries to swap
                  inT32 index2);

#endif  // TESSERACT_CCSTRUCT_STATISTC_H_