This file is indexed.

/usr/include/tesseract/cube_line_segmenter.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**********************************************************************
 * File:        cube_page_segmenter.h
 * Description: Declaration of the Cube Page Segmenter Class
 * Author:    Ahmad Abdulkader
 * Created:   2007
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

// TODO(ahmadab)
// This is really a makeshift line segmenter that works well for Arabic
// This should eventually be replaced by Ray Smith's Page segmenter
// There are lots of magic numbers below that were determined empirically
// but not thoroughly tested

#ifndef CUBE_LINE_SEGMENTER_H
#define CUBE_LINE_SEGMENTER_H

#include "cube_reco_context.h"
#include "allheaders.h"

namespace tesseract {

class CubeLineSegmenter {
 public:
  CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
  ~CubeLineSegmenter();

  // Accessor functions
  Pix *PostProcessedImage() {
    if (init_ == false && Init() == false) {
      return NULL;
    }
    return img_;
  }
  int ColumnCnt() {
    if (init_ == false && Init() == false) {
      return NULL;
    }
    return columns_->n;
  }
  Box *Column(int col) {
    if (init_ == false && Init() == false) {
      return NULL;
    }

    return columns_->boxa->box[col];
  }
  int LineCnt() {
    if (init_ == false && Init() == false) {
      return NULL;
    }

    return line_cnt_;
  }
  Pixa *ConComps() {
    if (init_ == false && Init() == false) {
      return NULL;
    }

    return con_comps_;
  }
  Pixaa *Columns() {
    if (init_ == false && Init() == false) {
      return NULL;
    }

    return columns_;
  }
  inline double AlefHgtEst() { return est_alef_hgt_; }
  inline double DotHgtEst() { return est_dot_hgt_; }
  Pix *Line(int line, Box **line_box);

 private:
  static const float kMinValidLineHgtRatio;
  static const int kLineSepMorphMinHgt;
  static const int kHgtBins;
  static const int kMaxConnCompHgt;
  static const int kMaxConnCompWid;
  static const int kMaxHorzAspectRatio;
  static const int kMaxVertAspectRatio;
  static const int kMinWid;
  static const int kMinHgt;
  static const double kMaxValidLineRatio;

  // Cube Reco context
  CubeRecoContext *cntxt_;
  // Original image
  Pix *orig_img_;
  // Post processed image
  Pix *img_;
  // Init flag
  bool init_;
  // Output Line and column info
  int line_cnt_;
  Pixaa *columns_;
  Pixa *con_comps_;
  Pixa *lines_pixa_;
  // Estimates for sizes of ALEF and DOT needed for Arabic analysis
  double est_alef_hgt_;
  double est_dot_hgt_;

  // Init the page analysis
  bool Init();
  // Performs line segmentation
  bool LineSegment();
  // Cleanup function
  Pix *CleanUp(Pix *pix);
  // compute validity ratio for a line
  double ValidityRatio(Pix *line_mask_pix, Box *line_box);
  // validate line
  bool ValidLine(Pix *line_mask_pix, Box *line_box);
  // split a line continuously until valid or fail
  Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
  // do a desperate attempt at cracking lines
  Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
  Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
  // Checks of a line is too small
  bool SmallLine(Box *line_box);
  // Compute the connected components in a line
  Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
                             Pixa **con_comps_pixa);
  // create a union of two arbitrary pix
  Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
  // create a union of a pixa subset
  Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
  // create a union of a pixa
  Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
  // merges a number of lines into one line given a bounding box and a mask
  bool MergeLine(Pix *line_mask_pix, Box *line_box,
                 Pixa *lines, Boxaa *lines_con_comps);
  // Creates new set of lines from the computed columns
  bool AddLines(Pixa *lines);
  // Estimate the parameters of the font(s) used in the page
  bool EstimateFontParams();
  // perform a vertical Closing with the specified threshold
  // returning the resulting conn comps as a pixa
  Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
  // Index the specific pixa using RTL reading order
  int *IndexRTL(Pixa *pixa);
  // Implements a rudimentary page & line segmenter
  bool FindLines();
};
}

#endif  // CUBE_LINE_SEGMENTER_H