/usr/bin/ocrfeeder-cli is in ocrfeeder 0.8.1-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | #! /usr/bin/python
# -*- coding: utf-8 -*-
###########################################################################
# OCRFeeder - The complete OCR suite
# Copyright (C) 2009 Joaquim Rocha
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
###########################################################################
import sys
import os
local_src = os.path.dirname(os.path.abspath(__file__)) + '/../src'
if os.path.exists(local_src):
sys.path = [local_src] + sys.path
installed_src = os.path.join('/usr', 'lib', 'python2.7',
'site-packages')
if os.path.exists(installed_src):
sys.path.insert(1, installed_src)
from PIL import Image
from ocrfeeder.util.constants import OCRFEEDER_STUDIO_VERSION
from ocrfeeder.util.graphics import getImageResolution, convertMultiImagesInList
from ocrfeeder.util.configuration import ConfigurationManager
from ocrfeeder.feeder.ocrEngines import OcrEnginesManager
from ocrfeeder.studio.dataHolder import PageData
from ocrfeeder.feeder.layoutAnalysis import LayoutAnalysis
from ocrfeeder.feeder.documentGeneration import DocumentGeneratorManager
from optparse import OptionParser
document_generator_manager = DocumentGeneratorManager()
formats = document_generator_manager.getFormats()
configuration_manager = ConfigurationManager()
ocr_engines_manager = OcrEnginesManager(configuration_manager)
ocr_engines_manager.makeEnginesFromFolder(configuration_manager.user_engines_folder)
ocr_engines = ocr_engines_manager.ocr_engines
ocr_engines_help_text = 'the OCR engine to be used.'
if not ocr_engines:
ocr_engines_help_text += ' No engines configured!'
else:
ocr_engines_help_text += ' Options are: %s' % \
', '.join([engine[0].name for engine in ocr_engines[:-1]])
if len(ocr_engines) > 1:
ocr_engines_help_text += ' or %s' % ocr_engines[-1][0].name
parser = OptionParser(usage = 'Usage: %prog -i IMAGE1 [-i IMAGE2, ...] -o FILE',
version = '%prog ' + OCRFEEDER_STUDIO_VERSION)
parser.add_option('-i', '--image', dest = 'images',
action = 'append', type = 'string', metavar = 'IMAGE1 [--image=IMAGE2, ...]',
help = 'images to be recognized', default = [])
format_help_text = ', '.join(formats) + ' or SPDF (for a searchable PDF)'
parser.add_option('-f', '--format', dest = 'format',
action = 'store', type = 'choice', default = 'ODT',
help = 'format of the generated document', metavar = format_help_text,
choices = formats + ['SPDF'])
parser.add_option('-o', '--output', dest = 'output',
action = 'store', type = 'string',
help = 'the document to be generated')
parser.add_option('-e', '--engine', dest = 'engine',
action = 'store', type = 'string',
help = ocr_engines_help_text)
parser.add_option('-l', '--language', dest = 'language',
action = 'store', type = 'string',
help = 'the language according to the ISO-639-1. For example "pt" for Portuguese or "en" for English')
parser.add_option('--window-size', dest = 'window_size', default = 'auto',
action = 'store', type = 'string', metavar= 'auto or an integer value',
help = 'the segmentation algorithm window size')
options, args = parser.parse_args()
if len(options.images) < 1:
parser.error('Please specify the images to be recognized.')
parser.print_help()
exit(0)
if options.output is None:
parser.error('Please specify the output file.')
parser.print_help()
exit(0)
images = options.images
window_size = options.window_size
if window_size == 'auto':
window_size = None
else:
try:
window_size = int(window_size)
except ValueError:
parser.error('Please use either "auto" or an integer value '
'for the window size.')
exit(0)
export_format = options.format
file_name = options.output
if not file_name:
parser.error('Please choose the output name.')
if not len(ocr_engines):
parser.error('No OCR engines configured.')
exit(0)
engine_name = options.engine
if engine_name:
engine_name = engine_name.lower()
for engine in ocr_engines:
if engine[0].name.lower() == engine_name:
ocr_engine = engine[0]
break
else:
ocr_engine = ocr_engines[0][0]
if options.language:
ocr_engine.setLanguage(options.language)
pages = []
image_list = convertMultiImagesInList(images,
configuration_manager.TEMPORARY_FOLDER)
for image in image_list:
if not os.path.isfile(image):
parser.error('The image "%s" is not a file or does not exist.' %\
image)
exit(0)
page_data = PageData(image)
data_boxes = []
image_obj = Image.open(image)
layout_analysis = LayoutAnalysis(ocr_engine,
window_size)
resolution = getImageResolution(image_obj)[1]
page_data.data_boxes = layout_analysis.recognize(image,
resolution)
pages.append(page_data)
isPDF = 'PDF' in export_format
if isPDF or 'SPDF' in export_format:
generator_class = document_generator_manager.get('PDF')
document_generator = generator_class(file_name, isPDF)
else:
generator_class = document_generator_manager.get(export_format)
document_generator = generator_class(file_name)
for page in pages:
document_generator.addPage(page)
document_generator.save()
configuration_manager.removeTemporaryFolder()
|