/usr/lib/ruby/vendor_ruby/pdf/reader/page_layout.rb is in ruby-pdf-reader 1.3.3-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | # coding: utf-8
class PDF::Reader
# Takes a collection of TextRun objects and renders them into a single
# string that best approximates the way they'd appear on a render PDF page.
#
# media box should be a 4 number array that describes the dimensions of the
# page to be rendered as described by the page's MediaBox attribute
class PageLayout
def initialize(runs, mediabox)
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
@runs = merge_runs(runs)
@mean_font_size = mean(@runs.map(&:font_size)) || 0
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
@page_width = mediabox[2] - mediabox[0]
@page_height = mediabox[3] - mediabox[1]
@x_offset = @runs.map(&:x).sort.first
@current_platform_is_rbx_19 = RUBY_DESCRIPTION =~ /\Arubinius 2.0.0/ &&
RUBY_VERSION >= "1.9.0"
end
def to_s
return "" if @runs.empty?
page = row_count.times.map { |i| " " * col_count }
@runs.each do |run|
x_pos = ((run.x - @x_offset) / col_multiplier).round
y_pos = row_count - (run.y / row_multiplier).round
if y_pos < row_count && y_pos >= 0 && x_pos < col_count && x_pos >= 0
local_string_insert(page[y_pos], run.text, x_pos)
end
end
interesting_rows(page).map(&:rstrip).join("\n")
end
private
# given an array of strings, return a new array with empty rows from the
# beginning and end removed.
#
# interesting_rows([ "", "one", "two", "" ])
# => [ "one", "two" ]
#
def interesting_rows(rows)
line_lengths = rows.map { |l| l.strip.length }
first_line_with_text = line_lengths.index { |l| l > 0 }
last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
interesting_line_count = last_line_with_text - first_line_with_text
rows[first_line_with_text, interesting_line_count].map
end
def row_count
@row_count ||= (@page_height / @mean_font_size).floor
end
def col_count
@col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
end
def row_multiplier
@row_multiplier ||= @page_height.to_f / row_count.to_f
end
def col_multiplier
@col_multiplier ||= @page_width.to_f / col_count.to_f
end
def mean(collection)
if collection.size == 0
0
else
collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
end
end
def each_line(&block)
@runs.sort.group_by { |run|
run.y.to_i
}.map { |y, collection|
yield y, collection
}
end
# take a collection of TextRun objects and merge any that are in close
# proximity
def merge_runs(runs)
runs.group_by { |char|
char.y.to_i
}.map { |y, chars|
group_chars_into_runs(chars.sort)
}.flatten.sort
end
def group_chars_into_runs(chars)
runs = []
while head = chars.shift
if runs.empty?
runs << head
elsif runs.last.mergable?(head)
runs[-1] = runs.last + head
else
runs << head
end
end
runs
end
# This is a simple alternative to String#[]=. We can't use the string
# method as it's buggy on rubinius 2.0rc1 (in 1.9 mode)
#
# See my bug report at https://github.com/rubinius/rubinius/issues/1985
def local_string_insert(haystack, needle, index)
if @current_platform_is_rbx_19
char_count = needle.length
haystack.replace(
(haystack[0,index] || "") +
needle +
(haystack[index+char_count,500] || "")
)
else
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
end
end
end
end
|