This file is indexed.

/usr/include/tesseract/associate.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
///////////////////////////////////////////////////////////////////////
// File:        associate.h
// Description: Structs, classes, typedefs useful for the segmentation
//              search. Functions for scoring segmentation paths according
//              to their character widths, gap widths and seam cuts.
// Author:      Daria Antonova
// Created:     Mon Mar 8 11:26:43 PDT 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef ASSOCIATE_H
#define ASSOCIATE_H

#include "blobs.h"
#include "elst.h"
#include "matrix.h"
#include "seam.h"
#include "split.h"
#include "states.h"

class WERD_RES;

typedef inT16 BLOB_WEIGHTS[MAX_NUM_CHUNKS];

// Each unichar evaluated.
struct EVALUATION_RECORD {
  float match;
  float certainty;
  char character;
  int width;
  int gap;
};

typedef EVALUATION_RECORD EVALUATION_ARRAY[MAX_NUM_CHUNKS];

// Classification info for chunks.
//
// TODO(daria): move to tesseract namespace when obsolete code using
// this struct that is not in tesseract namespace is deprecated.
struct CHUNKS_RECORD {
  MATRIX *ratings;
  TBLOB *chunks;
  WERD_RES* word_res;  // Borrowed pointer - do not delete!
  SEAMS splits;
  int x_height;
  WIDTH_RECORD *chunk_widths;
  WIDTH_RECORD *char_widths;
  inT16 *weights;
};

namespace tesseract {

// Statisitcs about character widths, gaps and seams.
struct AssociateStats {
  AssociateStats() { Clear(); }

  void Clear() {
    shape_cost = 0.0f;
    bad_shape = false;
    full_wh_ratio = 0.0f;
    full_wh_ratio_total = 0.0f;
    full_wh_ratio_var = 0.0f;
    bad_fixed_pitch_right_gap = false;
    bad_fixed_pitch_wh_ratio = false;
  }

  void Print() {
    tprintf("AssociateStats: w(%g %d) s(%g %d)\n", shape_cost, bad_shape);
  }

  float shape_cost;           // cost of blob shape
  bool bad_shape;             // true if the shape of the blob is unacceptable
  float full_wh_ratio;        // width-to-hight ratio + gap on the right
  float full_wh_ratio_total;  // sum of width-to-hight ratios
                              // on the path terminating at this blob
  float full_wh_ratio_var;    // variance of full_wh_ratios on the path
  bool bad_fixed_pitch_right_gap;  // true if there is no gap before
                                   // the blob on the right
  bool bad_fixed_pitch_wh_ratio;   // true if the blobs has width-to-hight
                                   // ratio > kMaxFixedPitchCharAspectRatio
};

// Utility functions for scoring segmentation paths according to their
// character widths, gap widths, seam characteristics.
class AssociateUtils {
 public:
  static const float kMaxFixedPitchCharAspectRatio;
  static const float kMinGap;

  // Computes character widths, gaps and seams stats given the
  // AssociateStats of the path so far, col, row of the blob that
  // is being added to the path, and CHUNKS_RECORD containing information
  // about character widths, gaps and seams.
  // Fills associate_cost with the combined shape, gap and seam cost
  // of adding a unichar from (col, row) to the path (note that since
  // this function could be used to compute the prioritization for
  // pain points, (col, row) entry might not be classified yet; thus
  // information in the (col, row) entry of the ratings matrix is not used).
  //
  // Note: the function assumes that chunks_record, stats and
  // associate_cost pointers are not NULL.
  static void ComputeStats(int col, int row,
                           const AssociateStats *parent_stats,
                           int parent_path_length,
                           bool fixed_pitch,
                           float max_char_wh_ratio,
                           const DENORM *denorm,
                           CHUNKS_RECORD *chunks_record,
                           int debug_level,
                           AssociateStats *stats);

  // Returns the width of a chunk which is a composed of several blobs
  // blobs[start_blob..last_blob] inclusively.
  // Widths/gaps records are in the form:
  // width_record->num_char = n
  // width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
  static int GetChunksWidth(WIDTH_RECORD *width_record,
                            int start_blob, int last_blob);

  // Returns the width of a gap between the specified chunk and the next one.
  static inline int GetChunksGap(WIDTH_RECORD *width_record, int last_chunk) {
    return (last_chunk >= 0 && last_chunk < width_record->num_chars - 1) ?
      width_record->widths[last_chunk * 2 + 1] : 0;
  }

  // Returns the width cost for fixed-pitch text.
  static float FixedPitchWidthCost(float norm_width, float right_gap,
                                   bool end_pos, float max_char_wh_ratio);

  // Returns the gap cost for fixed-pitch text (penalizes vertically
  // overlapping components).
  static inline float FixedPitchGapCost(float norm_gap, bool end_pos) {
    return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
  }
};

}  // namespace tesseract

#endif