/usr/include/colib/ocrinterfaces.h is in libiulib-dev 0.4+is+0.3-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
| #ifndef h_ocrinterfaces__
#define h_ocrinterfaces__
// Copyright 2006 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz
// or its licensors, as applicable.
//
// You may not use this file except under the terms of the accompanying license.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Project: iulib -- image understanding library
// File: ocrinterfaces.h
// Purpose: interfaces to OCR system components
// Responsible: tmb
// Reviewer:
// Primary Repository:
// Web Sites: www.iupr.org, www.dfki.de
/// \file ocrinterfaces.h
/// \brief Interfaces to OCR system components
#include <stdlib.h>
#include "colib/narray.h"
#include "colib/narray-util.h"
#include "smartptr.h"
#include "misc.h"
#include "coords.h"
#include "nustring.h"
namespace colib {
/// Base class for OCR interfaces.
/// Contains some minimal information
/// and ways of interacting with an OCR component.
struct IComponent {
virtual const char *description() = 0;
// virtual methods for getting and setting parameters
/// Set a string property or throw an exception if not implemented.
virtual void set(const char *key,const char *value) {
throw "IComponent::set(char*,char*) unimplemented by subclass";
}
/// Set a number property or throw an exception if not implemented.
virtual void set(const char *key,double value) {
throw "IComponent::set(char*,double) unimplemented by subclass";
}
/// Get a string property or throw an exception if not implemented.
virtual const char *gets(const char *key) {
throw "IComponent::gets(char*) unimplemented by subclass";
}
/// Get a number property or throw an exception if not implemented.
virtual double getd(const char *key) {
throw "IComponent::getd(char*) unimplemented by subclass";
}
virtual ~IComponent() {}
};
/// Cleanup for gray scale document images.
/// Should work for both gray scale and binary images.
///
struct ICleanupGray : IComponent {
/// Clean up a gray image.
virtual void cleanup(bytearray &out,bytearray &in) = 0;
};
/// Cleanup for binary document images.
/// Should throw an error when applied to grayscale.
///
struct ICleanupBinary : IComponent {
/// Clean up a binary image.
virtual void cleanup(bytearray &out,bytearray &in) = 0;
};
/// Compute text/image probabilities
/// The output is in the standard RGB format
/// for text/image segmentation (see ocropus.org)
struct ITextImageClassification : IComponent {
/// Compute text/image probabilities.
virtual void textImageProbabilities(intarray &out,bytearray &in) = 0;
};
/// Perform binarization of grayscale images.
struct IBinarize : IComponent {
/// Binarize an image stored in a floatarray. Override this.
virtual void binarize(bytearray &out,floatarray &in) = 0;
/// \brief Binarize an image stored in a bytearray.
/// Override this if you want to provide a more efficient
/// implementation.
virtual void binarize(bytearray &out,bytearray &in) {
floatarray temp;
copy(temp,in);
binarize(out,temp);
}
};
/// Compute page segmentation into columns, lines, etc.
/// The output is in the standard RGB format
/// for page segmentation (see ocropus.org)
struct ISegmentPage : IComponent {
/// Segment the page.
virtual void segment(intarray &out,bytearray &in) = 0;
virtual void segment(intarray &out,bytearray &in,rectarray &obstacles)
{ throw "unimplemented"; }
};
/// Compute line segmentation into character hypotheses.
//
/// The output is in the standard RGB format
/// for page segmentation (see ocropus.org)
struct ISegmentLine : IComponent {
/// Segment a line.
virtual void charseg(intarray &out,bytearray &in) = 0;
};
/// \brief A generic interface for language models.
/// An IGenericFst is a directed graph
/// with output/cost/id written on arcs,
/// accept cost written on vertices and
/// a fixed start vertice.
struct IGenericFst : virtual IComponent {
/// Clear the language model
virtual void clear() = 0;
/// Get a single new state
virtual int newState() = 0;
/// Add a transition between the given states
virtual void addTransition(int from,int to,int output,float cost,int input) = 0;
/// A variant of addTransition() with equal input and output.
virtual void addTransition(int from,int to,int symbol,float cost) {
addTransition(from, to, symbol, cost, symbol);
}
/// Set the start state
virtual void setStart(int node) = 0;
/// Set a state as an accept state
virtual void setAccept(int node,float cost=0.0) = 0;
/// Obtain codes for "specials" (language model dependent)
virtual int special(const char *s) = 0;
/// \brief Compute the best path through the language model.
/// Useful for simple OCR tasks and for debugging.
virtual void bestpath(nustring &result) = 0;
/// destroy the language model
virtual ~IGenericFst() {}
/// simple interface for line recognizers
virtual void setString(nustring &text,floatarray &costs,intarray &ids) {
int n = text.length();
intarray states;
states.clear();
for(int i=0;i<n+1;i++)
states.push(newState());
for(int i=0;i<n;i++)
addTransition(states[i],states[i+1],text[i].ord(),costs[i],ids[i]);
setStart(states[0]);
setAccept(states[n]);
}
// reading methods
/// Get the number of states.
virtual int nStates() { throw "unimplemented"; }
/// Get the starting state.
virtual int getStart() { throw "unimplemented"; }
/// Get the accept cost of a given vertex (a cost to finish the line and quit).
virtual float getAcceptCost(int node) { throw "unimplemented"; }
/// Return an array of arcs leading from the given node.
virtual void arcs(colib::intarray &ids,
colib::intarray &targets,
colib::intarray &outputs,
colib::floatarray &costs,
int from) { throw "unimplemented"; }
/// Change a transition score between the given states
virtual void rescore(int from,int to,int output,float new_cost,int input) { throw "unimplemented"; }
/// A variant of rescore() with equal input and output.
virtual void rescore(int from, int to, int symbol, float new_cost) {
rescore(from, to, symbol, new_cost, symbol);
}
/// These methods should load and save in OpenFST format.
/// (A simple way of doing that is to convert internally to OpenFST,
/// then call its load/save methods.)
virtual void load(const char *file) = 0;
virtual void save(const char *file) = 0;
};
/// A generic interface for isolated character recognizers.
/// Note that this is not the preferred interface for character recognition,
/// since feature extraction is quite inefficient if it's done a character at a time.
struct ICharacterClassifier : IComponent {
/// \brief Classify a character without any information about position on the line.
///
/// May throw an exception if it's not implemented.
virtual void setImage(bytearray &input_image) = 0;
/// \brief Classify a character with information about position on the line.
//
/// May throw an exception if it's not implemented.
virtual void setImage(bytearray &image,int base_y, int xheight_y, int descender_y, int ascender_y) = 0;
/// Get the number of classes returned. Corresponds to indices to cls() and cost().
virtual int length() = 0;
/// Unicode character or character string.
//
/// Note that some classifiers may return multiple characters per class
virtual void cls(nustring &result, int i) = 0;
/// cost value for this classification; lower costs = better
/// should aim to return negative log likelihoods
virtual float cost(int i) = 0;
/// "adaptation" means temporary adaptation of the classifier
/// to all the characters between startTraining and finishTraining
/// other types of training are recognizer-dependent
virtual void startTraining(const char *type="adaptation") { throw "unimplemented"; }
/// \brief Train a character.
//
/// (Commonly, this only stores data in the model; training is via an external program).
/// This may be also train on ligatures (if supported),
/// that's why `characters' is a nustring.
virtual void addTrainingChar(bytearray &input_image,nustring &characters)
{ throw "unimplemented"; }
/// Train a character.
virtual void addTrainingChar(bytearray &image,int base_y, int xheight_y, int descender_y,
int ascender_y,nustring &characters) { throw "unimplemented"; }
/// Train a character in context (think about this some more).
virtual void addTrainingChar(bytearray &image,bytearray &mask,nustring &characters)
{ throw "unimplemented"; }
/// Finish training and switch back to recognition; this method may
/// take a long time to complete.
virtual void finishTraining() { throw "unimplemented"; }
/// Save a trained model to the stream.
virtual void save(FILE *stream) { throw "unimplemented"; }
void save(const char *path) { save(stdio(path, "wb")); }
/// Load a trained model from the stream.
virtual void load(FILE *stream) { throw "unimplemented"; }
void load(const char *path) { load(stdio(path, "rb")); }
/// \brief Convenience function for getting the best output
//
/// (useful for debugging)
virtual void best(nustring &result) {
int mi = -1;
float mc = 1e30;
for(int i=0;i<length();i++) {
if(cost(i)<mc) {
mi = i;
mc = cost(i);
}
}
if(mi>=0)
cls(result, mi);
else
result.clear();
}
/// destructor
virtual ~ICharacterClassifier() {}
};
/// A generic interface for text line recognition.
struct IRecognizeLine : IComponent {
/// \brief Recognize a text line and return a lattice representing
/// the recognition alternatives.
virtual void recognizeLine(IGenericFst &result,bytearray &image) = 0;
/// \brief Start training of the given type.
/// "adaptation" means temporary adaptation of the classifier
/// to all the lines between startTraining and finishTraining
/// other types of training are recognizer-dependent
virtual void startTraining(const char *type="adaptation") { throw "unimplemented"; }
/// \brief Train on a text line.
/// Usage is: call addTrainingLine with training data, then call finishTraining
/// The state of the object is undefined between calling addTrainingLine and finishTraining, and it is
/// an error to call recognizeLine before finishTraining completes. This allows both batch
/// and incemental training.
/// NB: you might train on length 1 strings for single character training
/// and might train on words if line alignment is not working
/// (well, for some training data)
virtual void addTrainingLine(bytearray &image,nustring &transcription) { throw "unimplemented"; }
/// \brief Train on a text line, given a segmentation.
/// This is analogous to addTrainingLine(bytearray,nustring) except that
/// it takes the "ground truth" line segmentation.
virtual void addTrainingLine(intarray &segmentation, bytearray &image_grayscale, nustring &transcription) { throw "unimplemented"; }
/// Align a lattice with a transcription.
/// \param[out] chars Non-space characters along the best path.
/// \param[out] result Aligned segmentation, colors correspond to chars
/// \param[out] costs Costs corresponding to chars
/// \param[in] image Input grayscale image
/// \param[in] transcription The "ground truth" lattice to align
virtual void align(nustring &chars,intarray &result,floatarray &costs,bytearray &image,IGenericFst &transcription) { throw "unimplemented"; }
// eventually?
// virtual void addTrainingLine(bytearray &image,IGenericFst &transcription) { throw "unimplemented"; }
/// \brief Finish training, possibly making complex calculations.
/// Call this when training is done and the system should switch back to recognition;
/// this method may take a long time to complete.
virtual void finishTraining() { throw "unimplemented"; }
/// Save a trained model to the stream.
virtual void save(FILE *stream) { throw "unimplemented"; }
void save(const char *path) { save(stdio(path, "wb")); }
/// Load a trained model from the stream.
virtual void load(FILE *stream) { throw "unimplemented"; }
void load(const char *path) { load(stdio(path, "rb")); }
/// Destructor
virtual ~IRecognizeLine() {}
/// this is a weird, optional method that exposes character segmentation for those line recognizers that have it
/// segmentation contains colored pixels, and a transition in
/// the transducer of the form * --- 1/eps --> * --- 2/a --> *
/// means that pixels with color 1 and 2 together form the
/// letter "a"
virtual void recognizeLine(intarray &segmentation,IGenericFst &result,bytearray &image) { throw "unimplemented"; }
// recognize a line with or without a given segmentation
// if useit is set to true, the given segmentation is just displayed in loggers, but not used,
// the segmenter computes the segmentation and the recognition uses its output
// if useit is set to false, the segmenter is still launched for the loggers, but the given
// segmentation is really used for the recognition
virtual void recognizeLineSeg(intarray &segmentation,IGenericFst &result,bytearray &image, bool useit) { throw "unimplemented"; }
};
}
#endif
|