/usr/lib/ruby/vendor_ruby/pdf/reader/page_text_receiver.rb is in ruby-pdf-reader 1.3.3-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | # coding: utf-8
require 'forwardable'
require 'pdf/reader/page_layout'
module PDF
class Reader
# Builds a UTF-8 string of all the text on a single page by processing all
# the operaters in a content stream.
#
class PageTextReceiver
extend Forwardable
SPACE = " "
attr_reader :state, :content, :options
########## BEGIN FORWARDERS ##########
# Graphics State Operators
def_delegators :@state, :save_graphics_state, :restore_graphics_state
# Matrix Operators
def_delegators :@state, :concatenate_matrix
# Text Object Operators
def_delegators :@state, :begin_text_object, :end_text_object
# Text State Operators
def_delegators :@state, :set_character_spacing, :set_horizontal_text_scaling
def_delegators :@state, :set_text_font_and_size, :font_size
def_delegators :@state, :set_text_leading, :set_text_rendering_mode
def_delegators :@state, :set_text_rise, :set_word_spacing
# Text Positioning Operators
def_delegators :@state, :move_text_position, :move_text_position_and_set_leading
def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line
########## END FORWARDERS ##########
# starting a new page
def page=(page)
@state = PageState.new(page)
@content = []
@characters = []
@mediabox = page.attributes[:MediaBox]
end
def content
PageLayout.new(@characters, @mediabox).to_s
end
#####################################################
# Text Showing Operators
#####################################################
# record text that is drawn on the page
def show_text(string) # Tj (AWAY)
internal_show_text(string)
end
def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
params.each do |arg|
if arg.is_a?(String)
internal_show_text(arg)
else
@state.process_glyph_displacement(0, arg, false)
end
end
end
def move_to_next_line_and_show_text(str) # '
@state.move_to_start_of_next_line
show_text(str)
end
def set_spacing_next_line_show_text(aw, ac, string) # "
@state.set_word_spacing(aw)
@state.set_character_spacing(ac)
move_to_next_line_and_show_text(string)
end
#####################################################
# XObjects
#####################################################
def invoke_xobject(label)
@state.invoke_xobject(label) do |xobj|
case xobj
when PDF::Reader::FormXObject then
xobj.walk(self)
end
end
end
private
def internal_show_text(string)
if @state.current_font.nil?
raise PDF::Reader::MalformedPDFError, "current font is invalid"
end
glyphs = @state.current_font.unpack(string)
glyphs.each_with_index do |glyph_code, index|
# paint the current glyph
newx, newy = @state.trm_transform(0,0)
utf8_chars = @state.current_font.to_utf8(glyph_code)
# apply to glyph displacment for the current glyph so the next
# glyph will appear in the correct position
glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
th = 1
scaled_glyph_width = glyph_width * @state.font_size * th
unless utf8_chars == SPACE
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
end
@state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
end
end
end
end
end
|