/usr/share/pyshared/gamera/plugins/pagesegmentation.py is in python-gamera 3.3.2-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 | #
# Copyright (C) 2007-2009 Christoph Dalitz, Stefan Ruloff, Robert Butz,
# Maria Elhachimi, Ilya Stoyanov, Rene Baston
# 2010 Christoph Dalitz
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
from gamera.plugin import *
import _pagesegmentation
class projection_cutting(PluginFunction):
"""
Segments a page with the *Iterative Projection Profile Cuttings*
method.
The image is split recursively in the horizontal and vertical
direction by looking for 'gaps' in the projection profile. A
'gap' is a contiguous sequence of projection values smaller than
*noise* pixels. The splitting is done for each gap wider or higher
than given thresholds *Tx* or *Ty*. When no further split points
are found, the recursion stops.
Whether the resulting segments represent lines, columns or
paragraphs depends on the values for *Tx* and *Ty*. The return
value is a list of 'CCs' where each 'CC' represents a found
segment. Note that the input image is changed such that each pixel
is set to its CC label.
*Tx*:
minimum 'gap' width in the horizontal direction. When set to
zero, *Tx* is set to the median height of all connected
component * 7, which might be a reasonable assumption for the
gap width between adjacent text columns.
*Ty*:
minimum 'gap' width in the vertical direction. When set to
zero, *Ty* is set to half the median height of all connected
component, which might be a reasonable assumption for the gap
width between adjacent text lines.
*noise*:
maximum projection value still consideread as belonging to a
'gap'.
*gap_treatment*:
decides how to treat gaps when *noise* is non zero.
When 0 ('cut'), gaps are cut in the middle and the noise pixels
in the gap are assigned to the segments.
When 1 ('ignore'), noise pixels within the gap are not assigned
to a segment, in other words, they are ignored.
"""
self_type = ImageType([ONEBIT])
args = Args([Int('Tx', default = 0), Int('Ty', default = 0), \
Int('noise', default = 0), Choice('gap_treatment', ["cut", "ignore"], default=0)])
return_type = ImageList("ccs")
author = "Maria Elhachimi and Robert Butz"
def __call__(image, Tx = 0, Ty = 0, noise = 0, gap_treatment = 0):
return _pagesegmentation.projection_cutting(image, Tx, Ty, noise, gap_treatment)
__call__ = staticmethod(__call__)
class runlength_smearing(PluginFunction):
"""
Segments a page with the *Run Length Smearing* algorithm.
The algorithm converts white horizontal and vertical runs shorter
than given thresholds *Cx* and *Cy* to black pixels (this is the
so-called 'run length smearing').
The intersection of both smeared images yields the page segments
as black regions. As this typically still consists small white
horizontal gaps, these gaps narrower than *Csm* are in a final
step also filled out.
The return value is a list of 'CCs' where each 'CC' represents a
found segment. Note that the input image is changed such that each
pixel is set to its CC label.
Arguments:
*Cx*:
Minimal length of white runs in the rows. When set to *-1*, it
is set to 20 times the median height of all connected
components.
*Cy*:
Minimal length of white runs in the columns. When set to *-1*,
it is set to 20 times the median height of all connected
components.
*Csm*:
Minimal length of white runs row-wise in the almost final
image. When set to *-1*, it is set to 3 times the median height
of all connected components.
"""
self_type = ImageType([ONEBIT])
return_type = ImageList("ccs")
args = Args([Int('Cx', default = -1), Int('Cy', default = -1), \
Int('Csm', default = -1)])
author = "Christoph Dalitz and Iliya Stoyanov"
def __call__(image, Cx = -1, Cy = -1, Csm = -1):
return _pagesegmentation.runlength_smearing(image, Cx, Cy, Csm)
__call__ = staticmethod(__call__)
class bbox_merging(PluginFunction):
"""
Segments a page by extending and merging the bounding boxes of the
connected components on the page.
How much the segments are extended is controlled by the arguments
*Ex* and *Ey*. Depending on their value, the returned segments
can be lines or paragraphs or something else.
The return value is a list of 'CCs' where each 'CC' represents a
found segment. Note that the input image is changed such that each
pixel is set to its segment label.
Arguments:
*Ex*:
How much each CC is extended to the left and right before merging.
When *-1*, it is set to twice the average size of all CCs.
*Ey*:
How much each CC is extended to the top and bottom before merging.
When *-1*, it is set to twice the average size of all CCs.
This will typically segemtn into paragraphs.
If you want to segment into lines, set *Ey* to something small like
one sixth of the median symbol height.
"""
self_type = ImageType([ONEBIT])
return_type = ImageList("ccs")
args = Args([Int('Ex', default = -1), Int('Ey', default = -1)])
pure_python = True
author = "Rene Baston, Karl MacMillan, and Christoph Dalitz"
def __call__(self, Ex=-1, Ey=-1):
# two helper functions for merging rectangles
def find_intersecting_rects(glyphs, index):
g = glyphs[index]
inter = []
for i in range(len(glyphs)):
if i == index:
continue
if g.intersects(glyphs[i]):
inter.append(i)
return inter
def list_union_rects(big_rects):
current = 0
rects = big_rects
while(1):
inter = find_intersecting_rects(rects, current)
if len(inter):
g = rects[current]
new_rects = [g]
for i in range(len(rects)):
if i == current:
continue
if i in inter:
g.union(rects[i])
else:
new_rects.append(rects[i])
rects = new_rects
current = 0
else:
current += 1
if(current >= len(rects)):
break
return rects
# the actual plugin
from gamera.core import Dim, Rect, Point, Cc
from gamera.plugins.image_utilities import union_images
page = self.image_copy()
ccs = page.cc_analysis()
# compute average CC size
avg_size = 0.0
for c in ccs:
avg_size += c.nrows
avg_size += c.ncols
avg_size /= (2 * len(ccs))
avg_size = int(avg_size)
if Ex == -1:
Ex = avg_size*2
if Ey == -1:
Ey = avg_size*2
# extend CC bounding boxes
big_rects = []
for c in ccs:
ul_y = max(0, c.ul_y - Ey)
ul_x = max(0, c.ul_x - Ex)
lr_y = min(page.lr_y, c.lr_y + Ey)
lr_x = min(page.lr_x, c.lr_x + Ex)
nrows = lr_y - ul_y + 1
ncols = lr_x - ul_x + 1
big_rects.append(Rect(Point(ul_x, ul_y), Dim(ncols, nrows)))
extended_segs = list_union_rects(big_rects)
# build new merged CCs
tmplist = ccs[:]
dellist = []
seg_ccs = []
seg_cc = []
if(len(extended_segs) > 0):
label = 1
for seg in extended_segs:
label += 1
for cc in tmplist:
if(seg.intersects(cc)):
# mark original image with segment label
self.highlight(cc, label)
seg_cc.append(cc)
dellist.append(cc)
if len(seg_cc) == 0:
continue
seg_rect = seg_cc[0].union_rects(seg_cc)
new_seg = Cc(self, label, seg_rect.ul, seg_rect.lr)
seg_cc = []
for item in dellist:
tmplist.remove(item)
dellist = []
seg_ccs.append(new_seg)
return seg_ccs
__call__ = staticmethod(__call__)
class sub_cc_analysis(PluginFunction):
"""
Further subsegments the result of a page segmentation algorithm into
groups of actual connected components.
The result of a page segmenattion plugin is a list of 'CCs' where
each 'CC' does not represent a 'connected component', but a page
segment (typically a line of text). In a practical OCR application
you will however need the actual connected components (which
should roughly corresond to the glyphs) in groups of lines. That
is what this plugin is meant for.
The input image must be an image that has been processed with a
page segmentation plugin, i.e. all pixels in the image must be
labeled with a segment label. The input parameter *cclist* is the
list of segments returned by the page segmentation algorithm.
The return value is a tuple with two entries:
- a new image with all pixels labeled according to the new CCs
- a list of ImageLists, each list containing all connected components
belonging to the same page segments
.. note:: The groups will be returned in the same order as given
in *cclist*. This means that you can sort the page segments
by reading order before passing them to *sub_cc_analysis*.
Note that the order of the returned CCs within each group is
not well defined. Hence you will generally need to sort each
subgroup by reading order.
"""
self_type = ImageType([ONEBIT])
return_type = Class("img_ccs", tuple)
args = Args([ImageList('cclist')])
author = "Stephan Ruloff and Christoph Dalitz"
class textline_reading_order(PluginFunction):
"""
Sorts a list of Images (CCs) representing textlines by reading order and
returns the sorted list. Incidentally, this will not only work on
textlines, but also on paragraphs, but *not* on actual Connected
Components.
The algorithm sorts all lines in topological order, based on
the following criteria for the pairwise order of two lines:
- line *a* comes before line *b* when *a* is totally to the left
of *b* (order \"column before row\")
- line *a* comes before *b* when both overlap horizontally and
*a* is above *b* (order within a column)
In the reference `\"High Performance Document Analysis\"`__
by T.M. Breuel (Symposium on Document Image Understanding,
USA, pp. 209-218, 2003),
an additional constraint is made for the first criterion by demanding
that no other segment may be between *a* and *b* that opverlaps
horizontally with both. This constraint for taking multi column
headings that interrupt columns into account is replaced in this
implementation with an a priori sort of all textlines by *y*-position.
This results in a preference of rows over columns (in case of ambiguity)
in the depth-first-search utilized in the topological sorting.
.. __: http://pubs.iupr.org/DATA/2003-breuel-sdiut.pdf
As this function is not an image method, but a free function, it
is not automatically imported with all plugins and you must import
it explicitly with
.. code:: Python
from gamera.plugins.pagesegmentation import textline_reading_order
"""
self_type = None
return_type = ImageList("orderedccs")
args = Args([ImageList("lineccs")])
pure_python = True
author = "Christoph Dalitz"
def __call__(lineccs):
# utilities for Gamera's graph API
from gamera import graph
from gamera import graph_util
class SegForGraph:
def __init__(self,seg):
self.segment = seg
self.label = 0
#
# build directed graph of all lines
#
G = graph.Graph(graph.FLAG_DAG)
seg_data = [SegForGraph(s) for s in lineccs]
# sort by y-position for row over column preference in ambiguities
seg_data.sort(lambda s,t: s.segment.offset_y - t.segment.offset_y)
G.add_nodes(seg_data)
for s in seg_data:
for t in seg_data:
if s.segment.offset_x <= t.segment.offset_x + t.segment.ncols and \
s.segment.offset_x + s.segment.ncols >= t.segment.offset_x:
if s.segment.offset_y < t.segment.offset_y:
G.add_edge(s,t)
elif s.segment.offset_x < t.segment.offset_x:
G.add_edge(s,t)
#
# compute topoligical sorting by depth-first-search
#
segs_sorted = [] # topologically sorted list
def dfs_visit(node):
node.data.label = 1
for nextnode in node.nodes:
if nextnode.data.label == 0:
dfs_visit(nextnode)
segs_sorted.append(node.data.segment)
for node in G.get_nodes():
if node.data.label == 0:
dfs_visit(node)
segs_sorted.reverse() # correct that we always appended to the back
return segs_sorted
__call__ = staticmethod(__call__)
class segmentation_error(PluginFunction):
"""Compares a ground truth segmentation *Gseg* with a segmentation *Sseg*
and returns error count numbers.
The input images must be given in such a way that each segment is
uniquely labeled, similar to the output of a page segmentation
algorithm like `runlength_smearing`_. For ground truth data, such a labeled
image can be obtained from an external color image with `colors_to_labels`_.
.. _`runlength_smearing`: #runlength-smearing
.. _`colors_to_labels`: color.html#colors-to-labels
The two segmentations are compared by building equivalence classes of
overlapping segments as described in
M. Thulke, V. Margner, A. Dengel:
*A general approach to quality evaluation of document
segmentation results.*
Lecture Notes in Computer Science 1655, pp. 43-57 (1999)
Each class is assigned an error type depending on how many ground truth
and test segments it contains. The return value is a tuple
(*n1,n2,n3,n4,n5,n6)* where each value is the total number of classes
with the corresponding error type:
+------+-----------------------+---------------+----------------------+
| Nr | Ground truth segments | Test segments | Error type |
+======+=======================+===============+======================+
| *n1* | 1 | 1 | correct |
+------+-----------------------+---------------+----------------------+
| *n2* | 1 | 0 | missed segment |
+------+-----------------------+---------------+----------------------+
| *n3* | 0 | 1 | false positive |
+------+-----------------------+---------------+----------------------+
| *n4* | 1 | > 1 | split |
+------+-----------------------+---------------+----------------------+
| *n5* | > 1 | 1 | merge |
+------+-----------------------+---------------+----------------------+
| *n6* | > 1 | > 1 | splits and merges |
+------+-----------------------+---------------+----------------------+
The total segmentation error can be computed from these numbers as
*1 - n1 / (n1 + n2 + n3 + n4 + n5 + n6)*. The individual numbers can
be of use to determine what exactly is wrong with the segmentation.
As this function is not an image method, but a free function, it
is not automatically imported with all plugins and you must import
it explicitly with
.. code:: Python
from gamera.plugins.pagesegmentation import segmentation_error
"""
self_type = None
args = Args([ImageType([ONEBIT], 'Gseg'), \
ImageType([ONEBIT], 'Sseg')])
return_type = IntVector("errors", length=6)
author = "Christoph Dalitz"
# module declaration
class PageSegmentationModule(PluginModule):
cpp_headers = ["pagesegmentation.hpp"]
cpp_namespace = ["Gamera"]
category = "PageSegmentation"
functions = [projection_cutting, runlength_smearing, bbox_merging, \
sub_cc_analysis, textline_reading_order, \
segmentation_error]
module = PageSegmentationModule() # create an instance of the module
# free function instances
textline_reading_order = textline_reading_order()
segmentation_error = segmentation_error()
|