This file is indexed.

/usr/include/tesseract/permute.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* -*-C-*-
 ********************************************************************************
 *
 * File:        permute.h  (Formerly permute.h)
 * Description:  Permute choices together
 * Author:       Mark Seaman, OCR Technology
 * Created:      Fri Sep 22 14:05:51 1989
 * Modified:     Mon May 20 16:32:04 1991 (Mark Seaman) marks@hpgrlt
 * Language:     C
 * Package:      N/A
 * Status:       Experimental (Do Not Distribute)
 *
 * (c) Copyright 1989, Hewlett-Packard Company.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 ********************************************************************************/
#ifndef PERMUTE_H
#define PERMUTE_H

/*----------------------------------------------------------------------
              I n c l u d e s
----------------------------------------------------------------------*/

#include "ratngs.h"
#include "params.h"
#include "unicharset.h"

#define MAX_PERM_LENGTH 128

/*----------------------------------------------------------------------
              V a r i a b l e s
----------------------------------------------------------------------*/
extern INT_VAR_H(fragments_debug, 0, "Debug character fragments");
extern INT_VAR_H(segment_debug, 0, "Debug the whole segmentation process");
extern BOOL_VAR_H(permute_debug, 0, "char permutation debug");

extern BOOL_VAR_H(permute_script_word, 0,
                  "Turn on word script consistency permuter");

extern BOOL_VAR_H(permute_fixed_length_dawg, 0,
                  "Turn on fixed-length phrasebook search permuter");

extern BOOL_VAR_H(segment_segcost_rating, 0,
                  "incorporate segmentation cost in word rating?");

extern double_VAR_H(segment_reward_script, 0.95,
                    "Score multipler for script consistency within a word. "
                    "Being a 'reward' factor, it should be <= 1. "
                    "Smaller value implies bigger reward.");

extern BOOL_VAR_H(permute_chartype_word, 0,
         "Turn on character type (property) consistency permuter");
extern double_VAR_H(segment_reward_chartype, 0.97,
           "Score multipler for char type consistency within a word. ");

extern double_VAR_H(segment_reward_ngram_best_choice, 0.99,
                    "Score multipler for ngram permuter's best choice"
                    " (only used in the Han script path).");

extern INT_VAR_H(max_permuter_attempts, 100000,
                 "Maximum number of different character choices to consider"
                 " during permutation. This limit is especially useful when"
                 " user patterns are specified, since overly generic patterns"
                 " can result in dawg search exploring an overly large number"
                 "of options.");

extern int permute_only_top;

/*----------------------------------------------------------------------
              F u n c t i o n s
----------------------------------------------------------------------*/
void adjust_non_word(const char *word, const char *word_lengths,
                     float rating, float *new_rating, float *adjust_factor);

const char* choose_il1(const char *first_char,   //first choice
                       const char *second_char,  //second choice
                       const char *third_char,   //third choice
                       const char *prev_char,    //prev in word
                       const char *next_char,    //next in word
                       const char *next_next_char);

namespace tesseract {

// This is an awkward solution to allow "compounding" of permuter effects.
// Right now, each permuter generates a WERD_CHOICE with some modified
// rating which is compared to the current best choice, and the winner
// is saved.  Therefore, independent permuter improvements, eg. from script
// consistency, dictionary check, and punctuation promoting, override each
// other and can not be combined.
// We need a trellis and someway to modify the path cost.  Instead, we
// approximate by saving a permutation string, which records the preferred
// char choice [0-9] at each position [0..#chunks], and a cumulative reward
// factor.  Non-conflicting changes can be accumulated and the combined
// result will be returned.
// Default_bias is the initial value for the base multiplier.  In other words,
// it is the multiplier for raw choice rating if nothing is modified.
// This would be 1.0 when used with reward-based permuters in CJK-path,
// but it could be > 1 (eg. segment_penalty_garbage) to be compatible with
// penalty-based permuters in the Latin path.
// Note this class does not handle fragmented characters.  It does so by
// setting the preferred position of fragmented characters to '1' at Init,
// which effectively skips the fragment choice.  However, it can still be
// overridden if collision is allowed.  It is the responsibility of the
// permuters to avoid permuting fragmented characters.
class PermuterState {
 public:
  PermuterState();

  void Init(const BLOB_CHOICE_LIST_VECTOR& char_choices,
            const UNICHARSET &unicharset,
            float default_bias,
            bool debug);

  void AddPreference(int start_pos, char* pos_str, float weight);

  void AddPreference(int char_pos, BLOB_CHOICE* blob_choice, float weight);

  WERD_CHOICE* GetPermutedWord(float *certainties, float *adjust_factor);

  void set_allow_collision(bool flag) { allow_collision_ = flag; }
  void set_adjust_factor(float factor) { adjust_factor_ = factor; }
  void set_debug(bool debug) { debug_ = debug; }
  bool position_marked(int pos) { return perm_state_[pos] != kPosFree; }

 private:
  static const char kPosFree = '.';

  const UNICHARSET *unicharset_;

  const BLOB_CHOICE_LIST_VECTOR *char_choices_;   // reference pointer only
                            // does not need to be allocated or freed
  char perm_state_[MAX_PERM_LENGTH];   // handles upto MAX_PERM_LENGTH-1 states
                            // stores preferred char choices, '0'..'9', or '.'
  int word_length_;         // the number of char positions in the word
  bool allow_collision_;    // can previously set preference to be overwritten?
  float adjust_factor_;     // multiplying factor for rating adjustment
  bool debug_;              // whether debug statements should be printed
};

}  // namespace tesseract

#endif