This file is indexed.

/usr/include/tesseract/lang_model.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/**********************************************************************
 * File:        lang_model.h
 * Description: Declaration of the Language Model Edge Base Class
 * Author:    Ahmad Abdulkader
 * Created:   2007
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

// The LanguageModel class abstracts a State machine that is modeled as a Trie
// structure. The state machine models the language being recognized by the OCR
// Engine
// This is an abstract class that is to be inherited by any language model

#ifndef LANG_MODEL_H
#define LANG_MODEL_H

#include "lang_mod_edge.h"
#include "char_altlist.h"
#include "char_set.h"
#include "tuning_params.h"

namespace tesseract {
class LangModel {
 public:
  LangModel() {
    ood_enabled_ = true;
    numeric_enabled_ = true;
    word_list_enabled_ = true;
    punc_enabled_ = true;
  }
  virtual ~LangModel() {}

  // Returns an edge pointer to the Root
  virtual LangModEdge *Root() = 0;
  // Returns the edges that fan-out of the specified edge and their count
  virtual LangModEdge **GetEdges(CharAltList *alt_list,
                                 LangModEdge *parent_edge,
                                 int *edge_cnt) = 0;
  // Returns is a sequence of 32-bit characters are valid within this language
  // model or net. And EndOfWord flag is specified. If true, the sequence has
  // to end on a valid word. The function also optionally returns the list
  // of language model edges traversed to parse the string
  virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
                               LangModEdge **edge_array = NULL) = 0;
  virtual bool IsLeadingPunc(char_32 ch) = 0;
  virtual bool IsTrailingPunc(char_32 ch) = 0;
  virtual bool IsDigit(char_32 ch) = 0;

  // accessor functions
  inline bool OOD() { return ood_enabled_; }
  inline bool Numeric() { return numeric_enabled_; }
  inline bool WordList() { return word_list_enabled_; }
  inline bool Punc() { return punc_enabled_; }
  inline void SetOOD(bool ood) { ood_enabled_ = ood; }
  inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
  inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
  inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }

 protected:
  bool ood_enabled_;
  bool numeric_enabled_;
  bool word_list_enabled_;
  bool punc_enabled_;
};
}

#endif  // LANG_MODEL_H