/usr/include/tesseract/char_samp.h is in libtesseract-dev 3.02.01-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | /**********************************************************************
* File: char_samp.h
* Description: Declaration of a Character Bitmap Sample Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharSamp inherits the Bmp8 class that represents images of
// words, characters and segments throughout Cube
// CharSamp adds more data members to hold the physical location of the image
// in a page, page number in a book if available.
// It also holds the label (GT) of the image that might correspond to a single
// character or a word
// It also provides methods for segmenting, scaling and cropping of the sample
#ifndef CHAR_SAMP_H
#define CHAR_SAMP_H
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include "bmp_8.h"
#include "string_32.h"
namespace tesseract {
class CharSamp : public Bmp8 {
public:
CharSamp();
CharSamp(int wid, int hgt);
CharSamp(int left, int top, int wid, int hgt);
~CharSamp();
// accessor methods
unsigned short Left() const { return left_; }
unsigned short Right() const { return left_ + wid_; }
unsigned short Top() const { return top_; }
unsigned short Bottom() const { return top_ + hgt_; }
unsigned short Page() const { return page_; }
unsigned short NormTop() const { return norm_top_; }
unsigned short NormBottom() const { return norm_bottom_; }
unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
unsigned short FirstChar() const { return first_char_; }
unsigned short LastChar() const { return last_char_; }
char_32 Label() const {
if (label32_ == NULL || LabelLen() != 1) {
return 0;
}
return label32_[0];
}
char_32 * StrLabel() const { return label32_; }
string stringLabel() const;
void SetLeft(unsigned short left) { left_ = left; }
void SetTop(unsigned short top) { top_ = top; }
void SetPage(unsigned short page) { page_ = page; }
void SetLabel(char_32 label) {
if (label32_ != NULL) {
delete []label32_;
}
label32_ = new char_32[2];
if (label32_ != NULL) {
label32_[0] = label;
label32_[1] = 0;
}
}
void SetLabel(const char_32 *label32) {
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
if (label32 != NULL) {
// remove any byte order markes if any
if (label32[0] == 0xfeff) {
label32++;
}
int len = LabelLen(label32);
label32_ = new char_32[len + 1];
if (label32_ != NULL) {
memcpy(label32_, label32, len * sizeof(*label32));
label32_[len] = 0;
}
}
}
void SetLabel(string str);
void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
void SetNormBottom(unsigned short norm_bottom) {
norm_bottom_ = norm_bottom;
}
void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
norm_aspect_ratio_ = norm_aspect_ratio;
}
void SetFirstChar(unsigned short first_char) {
first_char_ = first_char;
}
void SetLastChar(unsigned short last_char) {
last_char_ = last_char;
}
// Saves the charsamp to a dump file
bool Save2CharDumpFile(FILE *fp) const;
// Crops the underlying image and returns a new CharSamp with the
// same character information but new dimensions. Warning: does not
// necessarily set the normalized top and bottom correctly since
// those depend on its location within the word (or CubeSearchObject).
CharSamp *Crop();
// Computes the connected components of the char sample
ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
int min_con_comp_size) const;
// returns a copy of the charsamp that is scaled to the
// specified width and height
CharSamp *Scale(int wid, int hgt, bool isotropic = true);
// returns a Clone of the charsample
CharSamp *Clone() const;
// computes the features corresponding to the char sample
bool ComputeFeatures(int conv_grid_size, float *features);
// Load a Char Samp from a dump file
static CharSamp *FromCharDumpFile(CachedFile *fp);
static CharSamp *FromCharDumpFile(FILE *fp);
static CharSamp *FromCharDumpFile(unsigned char **raw_data);
static CharSamp *FromRawData(int left, int top, int wid, int hgt,
unsigned char *data);
static CharSamp *FromConComps(ConComp **concomp_array,
int strt_concomp, int seg_flags_size,
int *seg_flags, bool *left_most,
bool *right_most, int word_hgt);
static int AuxFeatureCnt() { return (5); }
// Return the length of the label string
int LabelLen() const { return LabelLen(label32_); }
static int LabelLen(const char_32 *label32) {
if (label32 == NULL) {
return 0;
}
int len = 0;
while (label32[++len] != 0);
return len;
}
private:
char_32 * label32_;
unsigned short page_;
unsigned short left_;
unsigned short top_;
// top of sample normalized to a word height of 255
unsigned short norm_top_;
// bottom of sample normalized to a word height of 255
unsigned short norm_bottom_;
// 255 * ratio of character width to (width + height)
unsigned short norm_aspect_ratio_;
unsigned short first_char_;
unsigned short last_char_;
};
}
#endif // CHAR_SAMP_H
|