This file is indexed.

/usr/include/tesseract/resultiterator.h is in libtesseract-dev 4.00~git2288-10f4998a-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
///////////////////////////////////////////////////////////////////////
// File:        resultiterator.h
// Description: Iterator for tesseract results that is capable of
//              iterating in proper reading order over Bi Directional
//              (e.g. mixed Hebrew and English) text.
// Author:      David Eger
// Created:     Fri May 27 13:58:06 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_

#include "platform.h"
#include "ltrresultiterator.h"

template <typename T> class GenericVector;
template <typename T> class GenericVectorEqEq;
class BLOB_CHOICE_IT;
class WERD_RES;
class STRING;

namespace tesseract {

class Tesseract;

class TESS_API ResultIterator : public LTRResultIterator {
 public:
  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);

  /**
   * ResultIterator is copy constructible!
   * The default copy constructor works just fine for us.
   */
  virtual ~ResultIterator() {}

  // ============= Moving around within the page ============.
  /**
   * Moves the iterator to point to the start of the page to begin
   * an iteration.
   */
  virtual void Begin();

  /**
   * Moves to the start of the next object at the given level in the
   * page hierarchy in the appropriate reading order and returns false if
   * the end of the page was reached.
   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
   * PageIteratorLevel level values will visit each non-text block once.
   * Think of non text blocks as containing a single para, with a single line,
   * with a single imaginary word.
   * Calls to Next with different levels may be freely intermixed.
   * This function iterates words in right-to-left scripts correctly, if
   * the appropriate language has been loaded into Tesseract.
   */
  virtual bool Next(PageIteratorLevel level);

  /**
   * IsAtBeginningOf() returns whether we're at the logical beginning of the
   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
   * For a full description, see pageiterator.h
   */
  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;

  /**
   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
   * point at the last word in a paragraph.  See PageIterator for full comment.
  */
  virtual bool IsAtFinalElement(PageIteratorLevel level,
                                PageIteratorLevel element) const;

  // ============= Functions that refer to words only ============.
  // Returns the number of blanks before the current word.
  int BlanksBeforeWord() const;

  // ============= Accessing data ==============.

  /**
   * Returns the null terminated UTF-8 encoded text string for the current
   * object at the given level. Use delete [] to free after use.
  */
  virtual char* GetUTF8Text(PageIteratorLevel level) const;

  /**
   * Return whether the current paragraph's dominant reading direction
   * is left-to-right (as opposed to right-to-left).
  */
  bool ParagraphIsLtr() const;

  // ============= Exposed only for testing =============.

  /**
   * Yields the reading order as a sequence of indices and (optional)
   * meta-marks for a set of words (given left-to-right).
   * The meta marks are passed as negative values:
   *   kMinorRunStart  Start of minor direction text.
   *   kMinorRunEnd    End of minor direction text.
   *   kComplexWord    The next indexed word contains both left-to-right and
   *                    right-to-left characters and was treated as neutral.
   *
   * For example, suppose we have five words in a text line,
   * indexed [0,1,2,3,4] from the leftmost side of the text line.
   * The following are all believable reading_orders:
   *
   * Left-to-Right (in ltr paragraph):
   *     { 0, 1, 2, 3, 4 }
   * Left-to-Right (in rtl paragraph):
   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
   * Right-to-Left (in rtl paragraph):
   *     { 4, 3, 2, 1, 0 }
   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
   */
  static void CalculateTextlineOrder(
      bool paragraph_is_ltr,
      const GenericVector<StrongScriptDirection> &word_dirs,
      GenericVectorEqEq<int> *reading_order);

  static const int kMinorRunStart;
  static const int kMinorRunEnd;
  static const int kComplexWord;

 protected:
  /**
   * We presume the data associated with the given iterator will outlive us.
   * NB: This is private because it does something that is non-obvious:
   *   it resets to the beginning of the paragraph instead of staying wherever
   *   resit might have pointed.
   */
  TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);

 private:
  /**
   * Calculates the current paragraph's dominant writing direction.
   * Typically, members should use current_paragraph_ltr_ instead.
   */
  bool CurrentParagraphIsLtr() const;

  /**
   * Returns word indices as measured from resit->RestartRow() = index 0
   * for the reading order of words within a textline given an iterator
   * into the middle of the text line.
   * In addition to non-negative word indices, the following negative values
   * may be inserted:
   *   kMinorRunStart  Start of minor direction text.
   *   kMinorRunEnd    End of minor direction text.
   *   kComplexWord    The previous word contains both left-to-right and
   *                   right-to-left characters and was treated as neutral.
   */
  void CalculateTextlineOrder(bool paragraph_is_ltr,
                              const LTRResultIterator &resit,
                              GenericVectorEqEq<int> *indices) const;
  /** Same as above, but the caller's ssd gets filled in if ssd != NULL. */
  void CalculateTextlineOrder(bool paragraph_is_ltr,
                              const LTRResultIterator &resit,
                              GenericVector<StrongScriptDirection> *ssd,
                              GenericVectorEqEq<int> *indices) const;

  /**
   * What is the index of the current word in a strict left-to-right reading
   * of the row?
   */
  int LTRWordIndex() const;

  /**
   * Given an iterator pointing at a word, returns the logical reading order
   * of blob indices for the word.
   */
  void CalculateBlobOrder(GenericVector<int> *blob_indices) const;

  /** Precondition: current_paragraph_is_ltr_ is set. */
  void MoveToLogicalStartOfTextline();

  /**
   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
   * are set.
   */
  void MoveToLogicalStartOfWord();

  /** Are we pointing at the final (reading order) symbol of the word? */
  bool IsAtFinalSymbolOfWord() const;

  /** Are we pointing at the first (reading order) symbol of the word? */
  bool IsAtFirstSymbolOfWord() const;

  /**
   * Append any extra marks that should be appended to this word when printed.
   * Mostly, these are Unicode BiDi control characters.
   */
  void AppendSuffixMarks(STRING *text) const;

  /** Appends the current word in reading order to the given buffer.*/
  void AppendUTF8WordText(STRING *text) const;

  /**
   * Appends the text of the current text line, *assuming this iterator is
   * positioned at the beginning of the text line*  This function
   * updates the iterator to point to the first position past the text line.
   * Each textline is terminated in a single newline character.
   * If the textline ends a paragraph, it gets a second terminal newline.
   */
  void IterateAndAppendUTF8TextlineText(STRING *text);

  /**
   * Appends the text of the current paragraph in reading order
   * to the given buffer.
   * Each textline is terminated in a single newline character, and the
   * paragraph gets an extra newline at the end.
   */
  void AppendUTF8ParagraphText(STRING *text) const;

  /** Returns whether the bidi_debug flag is set to at least min_level. */
  bool BidiDebug(int min_level) const;

  bool current_paragraph_is_ltr_;

  /**
   * Is the currently pointed-at character at the beginning of
   * a minor-direction run?
   */
  bool at_beginning_of_minor_run_;

  /** Is the currently pointed-at character in a minor-direction sequence? */
  bool in_minor_direction_;

  /**
   * Should detected inter-word spaces be preserved, or "compressed" to a single
   * space character (default behavior).
   */
  bool preserve_interword_spaces_;
};

}  // namespace tesseract.

#endif  // TESSERACT_CCMAIN_RESULT_ITERATOR_H_