/usr/include/colib/ocrinterfaces.h is in libiulib-dev 0.4+is+0.3-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | #ifndef h_ocrinterfaces__
#define h_ocrinterfaces__
// Copyright 2006 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz
// or its licensors, as applicable.
//
// You may not use this file except under the terms of the accompanying license.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Project: iulib -- image understanding library
// File: ocrinterfaces.h
// Purpose: interfaces to OCR system components
// Responsible: tmb
// Reviewer:
// Primary Repository:
// Web Sites: www.iupr.org, www.dfki.de
/// \file ocrinterfaces.h
/// \brief Interfaces to OCR system components
#include <stdlib.h>
#include "colib/narray.h"
#include "colib/narray-util.h"
#include "smartptr.h"
#include "misc.h"
#include "coords.h"
#include "nustring.h"
namespace colib {
/// Base class for OCR interfaces.
/// Contains some minimal information
/// and ways of interacting with an OCR component.
struct IComponent {
virtual const char *description() = 0;
// virtual methods for getting and setting parameters
/// Set a string property or throw an exception if not implemented.
virtual void set(const char *key,const char *value) {
throw "IComponent::set(char*,char*) unimplemented by subclass";
}
/// Set a number property or throw an exception if not implemented.
virtual void set(const char *key,double value) {
throw "IComponent::set(char*,double) unimplemented by subclass";
}
/// Get a string property or throw an exception if not implemented.
virtual const char *gets(const char *key) {
throw "IComponent::gets(char*) unimplemented by subclass";
}
/// Get a number property or throw an exception if not implemented.
virtual double getd(const char *key) {
throw "IComponent::getd(char*) unimplemented by subclass";
}
virtual ~IComponent() {}
};
/// Cleanup for gray scale document images.
/// Should work for both gray scale and binary images.
///
struct ICleanupGray : IComponent {
/// Clean up a gray image.
virtual void cleanup(bytearray &out,bytearray &in) = 0;
};
/// Cleanup for binary document images.
/// Should throw an error when applied to grayscale.
///
struct ICleanupBinary : IComponent {
/// Clean up a binary image.
virtual void cleanup(bytearray &out,bytearray &in) = 0;
};
/// Compute text/image probabilities
/// The output is in the standard RGB format
/// for text/image segmentation (see ocropus.org)
struct ITextImageClassification : IComponent {
/// Compute text/image probabilities.
virtual void textImageProbabilities(intarray &out,bytearray &in) = 0;
};
/// Perform binarization of grayscale images.
struct IBinarize : IComponent {
/// Binarize an image stored in a floatarray. Override this.
virtual void binarize(bytearray &out,floatarray &in) = 0;
/// \brief Binarize an image stored in a bytearray.
/// Override this if you want to provide a more efficient
/// implementation.
virtual void binarize(bytearray &out,bytearray &in) {
floatarray temp;
copy(temp,in);
binarize(out,temp);
}
};
/// Compute page segmentation into columns, lines, etc.
/// The output is in the standard RGB format
/// for page segmentation (see ocropus.org)
struct ISegmentPage : IComponent {
/// Segment the page.
virtual void segment(intarray &out,bytearray &in) = 0;
virtual void segment(intarray &out,bytearray &in,rectarray &obstacles)
{ throw "unimplemented"; }
};
/// Compute line segmentation into character hypotheses.
//
/// The output is in the standard RGB format
/// for page segmentation (see ocropus.org)
struct ISegmentLine : IComponent {
/// Segment a line.
virtual void charseg(intarray &out,bytearray &in) = 0;
};
/// \brief A generic interface for language models.
/// An IGenericFst is a directed graph
/// with output/cost/id written on arcs,
/// accept cost written on vertices and
/// a fixed start vertice.
struct IGenericFst : virtual IComponent {
/// Clear the language model
virtual void clear() = 0;
/// Get a single new state
virtual int newState() = 0;
/// Add a transition between the given states
virtual void addTransition(int from,int to,int output,float cost,int input) = 0;
/// A variant of addTransition() with equal input and output.
virtual void addTransition(int from,int to,int symbol,float cost) {
addTransition(from, to, symbol, cost, symbol);
}
/// Set the start state
virtual void setStart(int node) = 0;
/// Set a state as an accept state
virtual void setAccept(int node,float cost=0.0) = 0;
/// Obtain codes for "specials" (language model dependent)
virtual int special(const char *s) = 0;
/// \brief Compute the best path through the language model.
/// Useful for simple OCR tasks and for debugging.
virtual void bestpath(nustring &result) = 0;
/// destroy the language model
virtual ~IGenericFst() {}
/// simple interface for line recognizers
virtual void setString(nustring &text,floatarray &costs,intarray &ids) {
int n = text.length();
intarray states;
states.clear();
for(int i=0;i<n+1;i++)
states.push(newState());
for(int i=0;i<n;i++)
addTransition(states[i],states[i+1],text[i].ord(),costs[i],ids[i]);
setStart(states[0]);
setAccept(states[n]);
}
// reading methods
/// Get the number of states.
virtual int nStates() { throw "unimplemented"; }
/// Get the starting state.
virtual int getStart() { throw "unimplemented"; }
/// Get the accept cost of a given vertex (a cost to finish the line and quit).
virtual float getAcceptCost(int node) { throw "unimplemented"; }
/// Return an array of arcs leading from the given node.
virtual void arcs(colib::intarray &ids,
colib::intarray &targets,
colib::intarray &outputs,
colib::floatarray &costs,
int from) { throw "unimplemented"; }
/// Change a transition score between the given states
virtual void rescore(int from,int to,int output,float new_cost,int input) { throw "unimplemented"; }
/// A variant of rescore() with equal input and output.
virtual void rescore(int from, int to, int symbol, float new_cost) {
rescore(from, to, symbol, new_cost, symbol);
}
/// These methods should load and save in OpenFST format.
/// (A simple way of doing that is to convert internally to OpenFST,
/// then call its load/save methods.)
virtual void load(const char *file) = 0;
virtual void save(const char *file) = 0;
};
/// A generic interface for isolated character recognizers.
/// Note that this is not the preferred interface for character recognition,
/// since feature extraction is quite inefficient if it's done a character at a time.
struct ICharacterClassifier : IComponent {
/// \brief Classify a character without any information about position on the line.
///
/// May throw an exception if it's not implemented.
virtual void setImage(bytearray &input_image) = 0;
/// \brief Classify a character with information about position on the line.
//
/// May throw an exception if it's not implemented.
virtual void setImage(bytearray &image,int base_y, int xheight_y, int descender_y, int ascender_y) = 0;
/// Get the number of classes returned. Corresponds to indices to cls() and cost().
virtual int length() = 0;
/// Unicode character or character string.
//
/// Note that some classifiers may return multiple characters per class
virtual void cls(nustring &result, int i) = 0;
/// cost value for this classification; lower costs = better
/// should aim to return negative log likelihoods
virtual float cost(int i) = 0;
/// "adaptation" means temporary adaptation of the classifier
/// to all the characters between startTraining and finishTraining
/// other types of training are recognizer-dependent
virtual void startTraining(const char *type="adaptation") { throw "unimplemented"; }
/// \brief Train a character.
//
/// (Commonly, this only stores data in the model; training is via an external program).
/// This may be also train on ligatures (if supported),
/// that's why `characters' is a nustring.
virtual void addTrainingChar(bytearray &input_image,nustring &characters)
{ throw "unimplemented"; }
/// Train a character.
virtual void addTrainingChar(bytearray &image,int base_y, int xheight_y, int descender_y,
int ascender_y,nustring &characters) { throw "unimplemented"; }
/// Train a character in context (think about this some more).
virtual void addTrainingChar(bytearray &image,bytearray &mask,nustring &characters)
{ throw "unimplemented"; }
/// Finish training and switch back to recognition; this method may
/// take a long time to complete.
virtual void finishTraining() { throw "unimplemented"; }
/// Save a trained model to the stream.
virtual void save(FILE *stream) { throw "unimplemented"; }
void save(const char *path) { save(stdio(path, "wb")); }
/// Load a trained model from the stream.
virtual void load(FILE *stream) { throw "unimplemented"; }
void load(const char *path) { load(stdio(path, "rb")); }
/// \brief Convenience function for getting the best output
//
/// (useful for debugging)
virtual void best(nustring &result) {
int mi = -1;
float mc = 1e30;
for(int i=0;i<length();i++) {
if(cost(i)<mc) {
mi = i;
mc = cost(i);
}
}
if(mi>=0)
cls(result, mi);
else
result.clear();
}
/// destructor
virtual ~ICharacterClassifier() {}
};
/// A generic interface for text line recognition.
struct IRecognizeLine : IComponent {
/// \brief Recognize a text line and return a lattice representing
/// the recognition alternatives.
virtual void recognizeLine(IGenericFst &result,bytearray &image) = 0;
/// \brief Start training of the given type.
/// "adaptation" means temporary adaptation of the classifier
/// to all the lines between startTraining and finishTraining
/// other types of training are recognizer-dependent
virtual void startTraining(const char *type="adaptation") { throw "unimplemented"; }
/// \brief Train on a text line.
/// Usage is: call addTrainingLine with training data, then call finishTraining
/// The state of the object is undefined between calling addTrainingLine and finishTraining, and it is
/// an error to call recognizeLine before finishTraining completes. This allows both batch
/// and incemental training.
/// NB: you might train on length 1 strings for single character training
/// and might train on words if line alignment is not working
/// (well, for some training data)
virtual void addTrainingLine(bytearray &image,nustring &transcription) { throw "unimplemented"; }
/// \brief Train on a text line, given a segmentation.
/// This is analogous to addTrainingLine(bytearray,nustring) except that
/// it takes the "ground truth" line segmentation.
virtual void addTrainingLine(intarray &segmentation, bytearray &image_grayscale, nustring &transcription) { throw "unimplemented"; }
/// Align a lattice with a transcription.
/// \param[out] chars Non-space characters along the best path.
/// \param[out] result Aligned segmentation, colors correspond to chars
/// \param[out] costs Costs corresponding to chars
/// \param[in] image Input grayscale image
/// \param[in] transcription The "ground truth" lattice to align
virtual void align(nustring &chars,intarray &result,floatarray &costs,bytearray &image,IGenericFst &transcription) { throw "unimplemented"; }
// eventually?
// virtual void addTrainingLine(bytearray &image,IGenericFst &transcription) { throw "unimplemented"; }
/// \brief Finish training, possibly making complex calculations.
/// Call this when training is done and the system should switch back to recognition;
/// this method may take a long time to complete.
virtual void finishTraining() { throw "unimplemented"; }
/// Save a trained model to the stream.
virtual void save(FILE *stream) { throw "unimplemented"; }
void save(const char *path) { save(stdio(path, "wb")); }
/// Load a trained model from the stream.
virtual void load(FILE *stream) { throw "unimplemented"; }
void load(const char *path) { load(stdio(path, "rb")); }
/// Destructor
virtual ~IRecognizeLine() {}
/// this is a weird, optional method that exposes character segmentation for those line recognizers that have it
/// segmentation contains colored pixels, and a transition in
/// the transducer of the form * --- 1/eps --> * --- 2/a --> *
/// means that pixels with color 1 and 2 together form the
/// letter "a"
virtual void recognizeLine(intarray &segmentation,IGenericFst &result,bytearray &image) { throw "unimplemented"; }
// recognize a line with or without a given segmentation
// if useit is set to true, the given segmentation is just displayed in loggers, but not used,
// the segmenter computes the segmentation and the recognition uses its output
// if useit is set to false, the segmenter is still launched for the loggers, but the given
// segmentation is really used for the recognition
virtual void recognizeLineSeg(intarray &segmentation,IGenericFst &result,bytearray &image, bool useit) { throw "unimplemented"; }
};
}
#endif
|