This file is indexed.

/usr/include/tesseract/classify.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
///////////////////////////////////////////////////////////////////////
// File:        classify.h
// Description: classify class.
// Author:      Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CLASSIFY_CLASSIFY_H__
#define TESSERACT_CLASSIFY_CLASSIFY_H__

#include "adaptive.h"
#include "ccstruct.h"
#include "classify.h"
#include "dict.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "intfx.h"
#include "intmatcher.h"
#include "normalis.h"
#include "ratngs.h"
#include "ocrfeatures.h"
#include "unicity_table.h"

class ScrollView;
class WERD_CHOICE;
class WERD_RES;
struct ADAPT_RESULTS;
struct NORM_PROTOS;

static const int kUnknownFontinfoId = -1;
static const int kBlankFontinfoId = -2;

namespace tesseract {

struct ShapeRating;
class ShapeTable;

// How segmented is a blob. In this enum, character refers to a classifiable
// unit, but that is too long and character is usually easier to understand.
enum CharSegmentationType {
  CST_FRAGMENT,  // A partial character.
  CST_WHOLE,     // A correctly segmented character.
  CST_IMPROPER,  // More than one but less than 2 characters.
  CST_NGRAM      // Multiple characters.
};

class Classify : public CCStruct {
 public:
  Classify();
  virtual ~Classify();
  Dict& getDict() {
    return dict_;
  }

  const ShapeTable* shape_table() const {
    return shape_table_;
  }

  /* adaptive.cpp ************************************************************/
  ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
  int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId);
  // Runs the class pruner from int_templates on the given features, returning
  // the number of classes output in results.
  //    int_templates          Class pruner tables
  //    num_features           Number of features in blob
  //    features               Array of features
  //    normalization_factors  (input) Array of int_templates->NumClasses fudge
  //                           factors from blob normalization process.
  //                           (Indexed by CLASS_INDEX)
  //    expected_num_features  (input) Array of int_templates->NumClasses
  //                           expected number of features for each class.
  //                           (Indexed by CLASS_INDEX)
  //    results                (output) Sorted Array of pruned classes.
  //                           Array must be sized to take the maximum possible
  //                           number of outputs : int_templates->NumClasses.
  int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
                   int num_features,
                   const INT_FEATURE_STRUCT* features,
                   const uinT8* normalization_factors,
                   const uinT16* expected_num_features,
                   CP_RESULT_STRUCT* results);
  void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
                      CLASS_CUTOFF_ARRAY Cutoffs);
  void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
  void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
  ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File);
  /* normmatch.cpp ************************************************************/
  FLOAT32 ComputeNormMatch(CLASS_ID ClassId,
                           const FEATURE_STRUCT& feature, BOOL8 DebugMatch);
  void FreeNormProtos();
  NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
  /* protos.cpp ***************************************************************/
  void ReadClassFile();
  void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
  INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
                                   const UNICHARSET& target_unicharset);
  /* adaptmatch.cpp ***********************************************************/

  // Learn the given word using its chopped_word, seam_array, denorm,
  // box_word, best_state, and correct_text to learn both correctly and
  // incorrectly segmented blobs. If filename is not NULL, then LearnBlob
  // is called and the data will be written to a file for static training.
  // Otherwise AdaptToBlob is called for adaption within a document.
  // If rejmap is not NULL, then only chars with a rejmap entry of '1' will
  // be learned, otherwise all chars with good correct_text are learned.
  void LearnWord(const char* filename, const char *rejmap, WERD_RES *word);

  // Builds a blob of length fragments, from the word, starting at start,
  // and then learn it, as having the given correct_text.
  // If filename is not NULL, then LearnBlob
  // is called and the data will be written to a file for static training.
  // Otherwise AdaptToBlob is called for adaption within a document.
  // threshold is a magic number required by AdaptToChar and generated by
  // GetAdaptThresholds.
  // Although it can be partly inferred from the string, segmentation is
  // provided to explicitly clarify the character segmentation.
  void LearnPieces(const char* filename, int start, int length,
                   float threshold, CharSegmentationType segmentation,
                   const char* correct_text, WERD_RES *word);
  void InitAdaptiveClassifier(bool load_pre_trained_templates);
  void InitAdaptedClass(TBLOB *Blob,
                        const DENORM& denorm,
                        CLASS_ID ClassId,
                        int FontinfoId,
                        ADAPT_CLASS Class,
                        ADAPT_TEMPLATES Templates);
  void AdaptToPunc(TBLOB *Blob,
                   const DENORM& denorm,
                   CLASS_ID ClassId,
                   int FontinfoId,
                   FLOAT32 Threshold);
  void AmbigClassifier(TBLOB *Blob,
                       const DENORM& denorm,
                       INT_TEMPLATES Templates,
                       ADAPT_CLASS *Classes,
                       UNICHAR_ID *Ambiguities,
                       ADAPT_RESULTS *Results);
  void MasterMatcher(INT_TEMPLATES templates,
                     inT16 num_features,
                     const INT_FEATURE_STRUCT* features,
                     const uinT8* norm_factors,
                     ADAPT_CLASS* classes,
                     int debug,
                     int num_classes,
                     const TBOX& blob_box,
                     CLASS_PRUNER_RESULTS results,
                     ADAPT_RESULTS* final_results);
  // Converts configs to fonts, and if the result is not adapted, and a
  // shape_table_ is present, the shape is expanded to include all
  // unichar_ids represented, before applying a set of corrections to the
  // distance rating in int_result, (see ComputeCorrectedRating.)
  // The results are added to the final_results output.
  void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes,
                                       bool debug,
                                       int class_id,
                                       int bottom, int top,
                                       float cp_rating,
                                       int blob_length,
                                       const uinT8* cn_factors,
                                       INT_RESULT_STRUCT& int_result,
                                       ADAPT_RESULTS* final_results);
  // Applies a set of corrections to the distance im_rating,
  // including the cn_correction, miss penalty and additional penalty
  // for non-alnums being vertical misfits. Returns the corrected distance.
  double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
                                double im_rating, int feature_misses,
                                int bottom, int top,
                                int blob_length, const uinT8* cn_factors);
  void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
                               ADAPT_RESULTS *Results,
                               BLOB_CHOICE_LIST *Choices);
  void AddNewResult(ADAPT_RESULTS *results,
                    CLASS_ID class_id,
                    int shape_id,
                    FLOAT32 rating,
                    bool adapted,
                    int config,
                    int fontinfo_id,
                    int fontinfo_id2);
  int GetAdaptiveFeatures(TBLOB *Blob,
                          INT_FEATURE_ARRAY IntFeatures,
                          FEATURE_SET *FloatFeatures);

#ifndef GRAPHICS_DISABLED
  void DebugAdaptiveClassifier(TBLOB *Blob,
                               const DENORM& denorm,
                               ADAPT_RESULTS *Results);
#endif
  void GetAdaptThresholds (TWERD * Word,
                           const DENORM& denorm,
                           const WERD_CHOICE& BestChoice,
                           const WERD_CHOICE& BestRawChoice,
                           FLOAT32 Thresholds[]);

  PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
                             int NumBadFeat,
                             FEATURE_ID BadFeat[],
                             INT_CLASS IClass,
                             ADAPT_CLASS Class,
                             BIT_VECTOR TempProtoMask);
  int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
                             CLASS_ID ClassId,
                             int FontinfoId,
                             int NumFeatures,
                             INT_FEATURE_ARRAY Features,
                             FEATURE_SET FloatFeatures);
  void MakePermanent(ADAPT_TEMPLATES Templates,
                     CLASS_ID ClassId,
                     int ConfigId,
                     const DENORM& denorm,
                     TBLOB *Blob);
  void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
  void RemoveExtraPuncs(ADAPT_RESULTS *Results);
  void RemoveBadMatches(ADAPT_RESULTS *Results);
  void SetAdaptiveThreshold(FLOAT32 Threshold);
  void ShowBestMatchFor(TBLOB *Blob,
                        const DENORM& denorm,
                        CLASS_ID ClassId,
                        int shape_id,
                        BOOL8 AdaptiveOn,
                        BOOL8 PreTrainedOn,
                        ADAPT_RESULTS *Results);
  // Returns a string for the classifier class_id: either the corresponding
  // unicharset debug_str or the shape_table_ debug str.
  STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates,
                           int class_id, int config_id) const;
  // Converts a classifier class_id index with a config ID to:
  // shape_table_ present: a shape_table_ index OR
  // No shape_table_: a font ID.
  // Without shape training, each class_id, config pair represents a single
  // unichar id/font combination, so this function looks up the corresponding
  // font id.
  // With shape training, each class_id, config pair represents a single
  // shape table index, so the fontset_table stores the shape table index,
  // and the shape_table_ must be consulted to obtain the actual unichar_id/
  // font combinations that the shape represents.
  int ClassAndConfigIDToFontOrShapeID(int class_id,
                                      int int_result_config) const;
  // Converts a shape_table_ index to a classifier class_id index (not a
  // unichar-id!). Uses a search, so not fast.
  int ShapeIDToClassID(int shape_id) const;
  UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
                                 const DENORM& denorm,
                                 ADAPT_TEMPLATES Templates,
                                 ADAPT_RESULTS *Results);
  int CharNormClassifier(TBLOB *Blob,
                         const DENORM& denorm,
                         INT_TEMPLATES Templates,
                         ADAPT_RESULTS *Results);

  // As CharNormClassifier, but operates on a TrainingSample and outputs to
  // a GenericVector of ShapeRating without conversion to classes.
  int CharNormTrainingSample(bool pruner_only, const TrainingSample& sample,
                             GenericVector<ShapeRating>* results);
  UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
                             const DENORM& denorm,
                             CLASS_ID CorrectClass);
  void DoAdaptiveMatch(TBLOB *Blob,
                       const DENORM& denorm,
                       ADAPT_RESULTS *Results);
  void AdaptToChar(TBLOB *Blob,
                   const DENORM& denorm,
                   CLASS_ID ClassId,
                   int FontinfoId,
                   FLOAT32 Threshold);
  void DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm,
                          INT_CLASS_STRUCT* int_class);
  int AdaptableWord(TWERD *Word,
                  const WERD_CHOICE &BestChoiceWord,
                  const WERD_CHOICE &RawChoiceWord);
  void EndAdaptiveClassifier();
  void PrintAdaptiveStatistics(FILE *File);
  void SettupPass1();
  void SettupPass2();
  void AdaptiveClassifier(TBLOB *Blob,
                          const DENORM& denorm,
                          BLOB_CHOICE_LIST *Choices,
                          CLASS_PRUNER_RESULTS cp_results);
  void ClassifyAsNoise(ADAPT_RESULTS *Results);
  void ResetAdaptiveClassifierInternal();

  int GetBaselineFeatures(TBLOB *Blob,
                          const DENORM& denorm,
                          INT_TEMPLATES Templates,
                          INT_FEATURE_ARRAY IntFeatures,
                          uinT8* CharNormArray,
                          inT32 *BlobLength);
  int GetCharNormFeatures(TBLOB *Blob,
                          const DENORM& denorm,
                          INT_TEMPLATES Templates,
                          INT_FEATURE_ARRAY IntFeatures,
                          uinT8* PrunerNormArray,
                          uinT8* CharNormArray,
                          inT32 *BlobLength,
                          inT32 *FeatureOutlineIndex);
  // Computes the char_norm_array for the unicharset and, if not NULL, the
  // pruner_array as appropriate according to the existence of the shape_table.
  // The norm_feature is deleted as it is almost certainly no longer needed.
  void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature,
                             INT_TEMPLATES_STRUCT* templates,
                             uinT8* char_norm_array,
                             uinT8* pruner_array);

  bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
  void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, TBLOB *Blob);

  void ResetFeaturesHaveBeenExtracted();
  bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
  bool LooksLikeGarbage(const DENORM& denorm, TBLOB *blob);
  void RefreshDebugWindow(ScrollView **win, const char *msg,
                          int y_offset, const TBOX &wbox);
  /* float2int.cpp ************************************************************/
  void ClearCharNormArray(uinT8* char_norm_array);
  void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
                               uinT8* char_norm_array);
  void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
  /* intproto.cpp *************************************************************/
  INT_TEMPLATES ReadIntTemplates(FILE *File);
  void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
                         const UNICHARSET& target_unicharset);
  CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
                           bool* pretrained_on, int* shape_id);
  void ShowMatchDisplay();
  /* font detection ***********************************************************/
  UnicityTable<FontInfo>& get_fontinfo_table() {
    return fontinfo_table_;
  }
  UnicityTable<FontSet>& get_fontset_table() {
    return fontset_table_;
  }
  /* mfoutline.cpp ***********************************************************/
  void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale);
  /* outfeat.cpp ***********************************************************/
  FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
  /* picofeat.cpp ***********************************************************/
  FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);


  // Member variables.

  // Parameters.
  BOOL_VAR_H(prioritize_division, FALSE,
             "Prioritize blob division over chopping");
  INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP");
  BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier");
  INT_VAR_H(classify_debug_level, 0, "Classify debug level");

  /* mfoutline.cpp ***********************************************************/
  /* control knobs used to control normalization of outlines */
  INT_VAR_H(classify_norm_method, character, "Normalization Method   ...");
  double_VAR_H(classify_char_norm_range, 0.2,
             "Character Normalization Range ...");
  double_VAR_H(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...");
  double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...");
  double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...");
  double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...");

  /* adaptmatch.cpp ***********************************************************/
  BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching");
  BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching");
  BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier");
  BOOL_VAR_H(classify_use_pre_adapted_templates, 0,
             "Use pre-adapted classifier templates");
  BOOL_VAR_H(classify_save_adapted_templates, 0,
             "Save adapted templates to a file");
  BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger");
  INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level");
  INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
  INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
  double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)");
  double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)");
  double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)");
  double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)");
  double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)");
  double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: ");
  INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes");
  INT_VAR_H(matcher_min_examples_for_prototyping, 3,
            "Reliable Config Threshold");
  INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5,
            "Enable adaption even if the ambiguities have not been seen");
  double_VAR_H(matcher_clustering_max_angle_delta, 0.015,
               "Maximum angle delta for prototype clustering");
  double_VAR_H(classify_misfit_junk_penalty, 0.0,
               "Penalty to apply when a non-alnum is vertically out of "
               "its expected textline position");
  double_VAR_H(rating_scale, 1.5, "Rating scaling factor");
  double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
  double_VAR_H(tessedit_class_miss_scale, 0.00390625,
               "Scale factor for features not used");
  INT_VAR_H(classify_adapt_proto_threshold, 230,
            "Threshold for good protos during adaptive 0-255");
  INT_VAR_H(classify_adapt_feature_threshold, 230,
            "Threshold for good features during adaptive 0-255");
  BOOL_VAR_H(disable_character_fragments, TRUE,
             "Do not include character fragments in the"
             " results of the classifier");
  double_VAR_H(classify_character_fragments_garbage_certainty_threshold, -3.0,
               "Exclude fragments that do not match any whole character"
               " with at least this certainty");
  BOOL_VAR_H(classify_debug_character_fragments, FALSE,
             "Bring up graphical debugging windows for fragments training");
  BOOL_VAR_H(matcher_debug_separate_windows, FALSE,
             "Use two different windows for debugging the matching: "
             "One for the protos and one for the features.");
  STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning");

  /* intmatcher.cpp **********************************************************/
  INT_VAR_H(classify_class_pruner_threshold, 229,
            "Class Pruner Threshold 0-255");
  INT_VAR_H(classify_class_pruner_multiplier, 30,
            "Class Pruner Multiplier 0-255:       ");
  INT_VAR_H(classify_cp_cutoff_strength, 7,
            "Class Pruner CutoffStrength:         ");
  INT_VAR_H(classify_integer_matcher_multiplier, 14,
            "Integer Matcher Multiplier  0-255:   ");

  // Use class variables to hold onto built-in templates and adapted templates.
  INT_TEMPLATES PreTrainedTemplates;
  ADAPT_TEMPLATES AdaptedTemplates;

  // Create dummy proto and config masks for use with the built-in templates.
  BIT_VECTOR AllProtosOn;
  BIT_VECTOR PrunedProtos;
  BIT_VECTOR AllConfigsOn;
  BIT_VECTOR AllProtosOff;
  BIT_VECTOR AllConfigsOff;
  BIT_VECTOR TempProtoMask;
  bool EnableLearning;
  /* normmatch.cpp */
  NORM_PROTOS *NormProtos;
  /* font detection ***********************************************************/
  UnicityTable<FontInfo> fontinfo_table_;
  // Without shape training, each class_id, config pair represents a single
  // unichar id/font combination, so each fontset_table_ entry holds font ids
  // for each config in the class.
  // With shape training, each class_id, config pair represents a single
  // shape_table_ index, so the fontset_table_ stores the shape_table_ index,
  // and the shape_table_ must be consulted to obtain the actual unichar_id/
  // font combinations that the shape represents.
  UnicityTable<FontSet> fontset_table_;

  INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
  BOOL_VAR_H(classify_bln_numeric_mode, 0,
             "Assume the input is numbers [0-9].");

 protected:
  IntegerMatcher im_;
  FEATURE_DEFS_STRUCT feature_defs_;
  // If a shape_table_ is present, it is used to remap classifier output in
  // ExpandShapesAndApplyCorrections. font_ids referenced by configs actually
  // mean an index to the shape_table_ and the choices returned are *all* the
  // shape_table_ entries at that index.
  ShapeTable* shape_table_;

 private:

  Dict dict_;

  /* variables used to hold performance statistics */
  int AdaptiveMatcherCalls;
  int BaselineClassifierCalls;
  int CharNormClassifierCalls;
  int AmbigClassifierCalls;
  int NumWordsAdaptedTo;
  int NumCharsAdaptedTo;
  int NumBaselineClassesTried;
  int NumCharNormClassesTried;
  int NumAmbigClassesTried;
  int NumClassesOutput;
  int NumAdaptationsFailed;

  /* variables used to hold onto extracted features.  This is used
  to map from the old scheme in which baseline features and char norm
  features are extracted separately, to the new scheme in which they
  are extracted at the same time. */
  bool FeaturesHaveBeenExtracted;
  bool FeaturesOK;
  INT_FEATURE_ARRAY BaselineFeatures;
  INT_FEATURE_ARRAY CharNormFeatures;
  INT_FX_RESULT_STRUCT FXInfo;

  // Expected number of features in the class pruner, used to penalize
  // unknowns that have too few features (like a c being classified as e) so
  // it doesn't recognize everything as '@' or '#'.
  // CharNormCutoffs is for the static classifier (with no shapetable).
  // BaselineCutoffs gets a copy of CharNormCutoffs as an estimate of the real
  // value in the adaptive classifier. Both are indexed by unichar_id.
  // shapetable_cutoffs_ provides a similar value for each shape in the
  // shape_table_
  uinT16* CharNormCutoffs;
  uinT16* BaselineCutoffs;
  GenericVector<uinT16> shapetable_cutoffs_;
  ScrollView* learn_debug_win_;
  ScrollView* learn_fragmented_word_debug_win_;
  ScrollView* learn_fragments_debug_win_;
};
}  // namespace tesseract

#endif  // TESSERACT_CLASSIFY_CLASSIFY_H__