This file is indexed.

/usr/bin/ocrfeeder-cli is in ocrfeeder 0.8.1-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#! /usr/bin/python
# -*- coding: utf-8 -*-

###########################################################################
#    OCRFeeder - The complete OCR suite
#    Copyright (C) 2009 Joaquim Rocha
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
###########################################################################

import sys
import os
local_src = os.path.dirname(os.path.abspath(__file__)) + '/../src'
if os.path.exists(local_src):
    sys.path = [local_src] + sys.path
installed_src = os.path.join('/usr', 'lib', 'python2.7',
                             'site-packages')
if os.path.exists(installed_src):
    sys.path.insert(1, installed_src)

from PIL import Image
from ocrfeeder.util.constants import OCRFEEDER_STUDIO_VERSION
from ocrfeeder.util.graphics import getImageResolution, convertMultiImagesInList
from ocrfeeder.util.configuration import ConfigurationManager
from ocrfeeder.feeder.ocrEngines import OcrEnginesManager
from ocrfeeder.studio.dataHolder import PageData
from ocrfeeder.feeder.layoutAnalysis import LayoutAnalysis
from ocrfeeder.feeder.documentGeneration import DocumentGeneratorManager
from optparse import OptionParser

document_generator_manager = DocumentGeneratorManager()
formats = document_generator_manager.getFormats()

configuration_manager = ConfigurationManager()
ocr_engines_manager = OcrEnginesManager(configuration_manager)
ocr_engines_manager.makeEnginesFromFolder(configuration_manager.user_engines_folder)
ocr_engines = ocr_engines_manager.ocr_engines

ocr_engines_help_text = 'the OCR engine to be used.'
if not ocr_engines:
    ocr_engines_help_text += ' No engines configured!'
else:
    ocr_engines_help_text +=  ' Options are: %s' % \
                              ', '.join([engine[0].name for engine in ocr_engines[:-1]])
    if len(ocr_engines) > 1:
        ocr_engines_help_text += ' or %s' % ocr_engines[-1][0].name

parser = OptionParser(usage = 'Usage: %prog -i IMAGE1 [-i IMAGE2, ...] -o FILE',
                      version = '%prog ' + OCRFEEDER_STUDIO_VERSION)
parser.add_option('-i', '--image', dest = 'images',
                  action = 'append', type = 'string', metavar = 'IMAGE1 [--image=IMAGE2, ...]',
                  help = 'images to be recognized', default = [])
format_help_text = ', '.join(formats) + ' or SPDF (for a searchable PDF)'
parser.add_option('-f', '--format', dest = 'format',
                  action = 'store', type = 'choice', default = 'ODT',
                  help = 'format of the generated document', metavar = format_help_text,
                  choices = formats + ['SPDF'])
parser.add_option('-o', '--output', dest = 'output',
                  action = 'store', type = 'string',
                  help = 'the document to be generated')
parser.add_option('-e', '--engine', dest = 'engine',
                  action = 'store', type = 'string',
                  help = ocr_engines_help_text)
parser.add_option('-l', '--language', dest = 'language',
                  action = 'store', type = 'string',
                  help = 'the language according to the ISO-639-1. For example "pt" for Portuguese or "en" for English')
parser.add_option('--window-size', dest = 'window_size', default = 'auto',
                  action = 'store', type = 'string', metavar= 'auto or an integer value',
                  help = 'the segmentation algorithm window size')
options, args = parser.parse_args()

if len(options.images) < 1:
    parser.error('Please specify the images to be recognized.')
    parser.print_help()
    exit(0)
if options.output is None:
    parser.error('Please specify the output file.')
    parser.print_help()
    exit(0)

images = options.images
window_size = options.window_size
if window_size == 'auto':
    window_size = None
else:
    try:
        window_size = int(window_size)
    except ValueError:
        parser.error('Please use either "auto" or an integer value '
                     'for the window size.')
        exit(0)
export_format = options.format
file_name = options.output
if not file_name:
    parser.error('Please choose the output name.')

if not len(ocr_engines):
    parser.error('No OCR engines configured.')
    exit(0)

engine_name = options.engine
if engine_name:
    engine_name = engine_name.lower()
    for engine in ocr_engines:
        if engine[0].name.lower() == engine_name:
            ocr_engine = engine[0]
            break
else:
    ocr_engine = ocr_engines[0][0]

if options.language:
    ocr_engine.setLanguage(options.language)

pages = []

image_list = convertMultiImagesInList(images,
                                  configuration_manager.TEMPORARY_FOLDER)

for image in image_list:
    if not os.path.isfile(image):
        parser.error('The image "%s" is not a file or does not exist.' %\
                     image)
        exit(0)
    page_data = PageData(image)
    data_boxes = []
    image_obj = Image.open(image)
    layout_analysis = LayoutAnalysis(ocr_engine,
                                     window_size)
    resolution = getImageResolution(image_obj)[1]
    page_data.data_boxes = layout_analysis.recognize(image,
                                                     resolution)
    pages.append(page_data)

isPDF = 'PDF' in export_format
if isPDF or 'SPDF' in export_format:
    generator_class = document_generator_manager.get('PDF')
    document_generator = generator_class(file_name, isPDF)
else:
    generator_class = document_generator_manager.get(export_format)
    document_generator = generator_class(file_name)

for page in pages:
    document_generator.addPage(page)

document_generator.save()

configuration_manager.removeTemporaryFolder()