This file is indexed.

/usr/lib/ruby/vendor_ruby/pdf/reader/buffer.rb is in ruby-pdf-reader 1.3.3-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# coding: ASCII-8BIT

################################################################################
#
# Copyright (C) 2010 James Healy (jimmy@deefa.com)
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
################################################################################

class PDF::Reader

  # A string tokeniser that recognises PDF grammar. When passed an IO stream or a
  # string, repeated calls to token() will return the next token from the source.
  #
  # This is very low level, and getting the raw tokens is not very useful in itself.
  #
  # This will usually be used in conjunction with PDF:Reader::Parser, which converts
  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
  #
  class Buffer
    TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]

    # some strings for comparissons. Declaring them here avoids creating new
    # strings that need GC over and over
    LEFT_PAREN = "("
    LESS_THAN = "<"
    STREAM = "stream"
    ID = "ID"
    FWD_SLASH = "/"

    attr_reader :pos

    # Creates a new buffer.
    #
    # Params:
    #
    #   io - an IO stream or string with the raw data to tokenise
    #
    # options:
    #
    #   :seek - a byte offset to seek to before starting to tokenise
    #   :content_stream - set to true if buffer will be tokenising a
    #                     content stream. Defaults to false
    #
    def initialize (io, opts = {})
      @io = io
      @tokens = []
      @in_content_stream = opts[:content_stream]

      @io.seek(opts[:seek]) if opts[:seek]
      @pos = @io.pos
    end

    # return true if there are no more tokens left
    #
    def empty?
      prepare_tokens if @tokens.size < 3

      @tokens.empty?
    end

    # return raw bytes from the underlying IO stream.
    #
    #   bytes - the number of bytes to read
    #
    # options:
    #
    #   :skip_eol - if true, the IO stream is advanced past a CRLF or LF that
    #               is sitting under the io cursor.
    #
    def read(bytes, opts = {})
      reset_pos

      if opts[:skip_eol]
        @io.seek(-1, IO::SEEK_CUR)
        str = @io.read(2)
        if str.nil?
          return nil
        elsif str == "\r\n"
          # do nothing
        elsif str[0,1] == "\n"
          @io.seek(-1, IO::SEEK_CUR)
        else
          @io.seek(-2, IO::SEEK_CUR)
        end
      end

      bytes = @io.read(bytes)
      save_pos
      bytes
    end

    # return the next token from the source. Returns a string if a token
    # is found, nil if there are no tokens left.
    #
    def token
      reset_pos
      prepare_tokens if @tokens.size < 3
      merge_indirect_reference
      prepare_tokens if @tokens.size < 3

      @tokens.shift
    end

    # return the byte offset where the first XRef table in th source can be found.
    #
    def find_first_xref_offset
      check_size_is_non_zero
      @io.seek(-1024, IO::SEEK_END) rescue @io.seek(0)
      data = @io.read(1024)

      # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
      lines = data.split(/[\n\r]+/).reverse
      eof_index = lines.index { |l| l.strip[/^%%EOF/] }

      raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil?
      raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
      lines[eof_index+1].to_i
    end

    private

    def check_size_is_non_zero
      @io.seek(-1, IO::SEEK_END)
      @io.seek(0)
    rescue Errno::EINVAL
      raise MalformedPDFError, "PDF file is empty"
    end

    # Returns true if this buffer is parsing a content stream
    #
    def in_content_stream?
      @in_content_stream ? true : false
    end

    # Some bastard moved our IO stream cursor. Restore it.
    #
    def reset_pos
      @io.seek(@pos) if @io.pos != @pos
    end

    # save the current position of the source IO stream. If someone else (like another buffer)
    # moves the cursor, we can then restore it.
    #
    def save_pos
      @pos = @io.pos
    end

    # attempt to prime the buffer with the next few tokens.
    #
    def prepare_tokens
      10.times do
        case state
        when :literal_string then prepare_literal_token
        when :hex_string     then prepare_hex_token
        when :regular        then prepare_regular_token
        when :inline         then prepare_inline_token
        end
      end

      save_pos
    end

    # tokenising behaves slightly differently based on the current context.
    # Determine the current context/state by examining the last token we found
    #
    def state
      case @tokens.last
      when LEFT_PAREN then :literal_string
      when LESS_THAN then :hex_string
      when STREAM then :stream
      when ID
        if in_content_stream?  && @tokens[-2] != FWD_SLASH
          :inline
        else
          :regular
        end
      else
        :regular
      end
    end

    # detect a series of 3 tokens that make up an indirect object. If we find
    # them, replace the tokens with a PDF::Reader::Reference instance.
    #
    # Merging them into a single string was another option, but that would mean
    # code further up the stack would need to check every token  to see if it looks
    # like an indirect object. For optimisation reasons, I'd rather avoid
    # that extra check.
    #
    # It's incredibly likely that the next 3 tokens in the buffer are NOT an
    # indirect reference, so test for that case first and avoid the relatively
    # expensive regexp checks if possible.
    #
    def merge_indirect_reference
      return if @tokens.size < 3
      return if @tokens[2] != "R"

      if @tokens[0].match(/\d+/) && @tokens[1].match(/\d+/)
        @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
        @tokens[1] = nil
        @tokens[2] = nil
        @tokens.compact!
      end
    end

    def prepare_inline_token
      str = ""

      buffer = []

      until buffer[0] =~ /\s/ && buffer[1, 2] == ["E", "I"]
        chr = @io.read(1)
        buffer << chr

        if buffer.length > 3
          str << buffer.shift
        end
      end

      @tokens << string_token(str.strip)
      @io.seek(-3, IO::SEEK_CUR) unless chr.nil?
    end

    # if we're currently inside a hex string, read hex nibbles until
    # we find a closing >
    #
    def prepare_hex_token
      str = ""
      finished = false

      while !finished
        byte = @io.getbyte
        if byte.nil?
          finished = true # unbalanced params
        elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
          str << byte
        elsif byte <= 32
          # ignore it
        else
          @tokens << str if str.size > 0
          @tokens << ">" if byte != 0x3E # '>'
          @tokens << byte.chr
          finished = true
        end
      end
    end

    # if we're currently inside a literal string we more or less just read bytes until
    # we find the closing ) delimiter. Lots of bytes that would otherwise indicate the
    # start of a new token in regular mode are left untouched when inside a literal
    # string.
    #
    # The entire literal string will be returned as a single token. It will need further
    # processing to fix things like escaped new lines, but that's someone else's
    # problem.
    #
    def prepare_literal_token
      str = ""
      count = 1

      while count > 0
        byte = @io.getbyte
        if byte.nil?
          count = 0 # unbalanced params
        elsif byte == 0x5C
          str << byte << @io.getbyte
        elsif byte == 0x28 # "("
          str << "("
          count += 1
        elsif byte == 0x29 # ")"
          count -= 1
          str << ")" unless count == 0
        else
          str << byte unless count == 0
        end
      end

      @tokens << str if str.size > 0
      @tokens << ")"
    end

    # Extract the next regular token and stock it in our buffer, ready to be returned.
    #
    # What each byte means is complex, check out section "3.1.1 Character Set" of the 1.7 spec
    # to read up on it.
    #
    def prepare_regular_token
      tok = ""

      while byte = @io.getbyte
        case byte
        when 0x25
          # comment, ignore everything until the next EOL char
          done = false
          while !done
            byte = @io.getbyte
            done = true if byte.nil? || byte == 0x0A || byte == 0x0D
          end
        when *TOKEN_WHITESPACE
          # white space, token finished
          @tokens << tok if tok.size > 0

          #If the token was empty, chomp the rest of the whitespace too
          while TOKEN_WHITESPACE.include?(peek_byte) && tok.size == 0
            @io.getbyte
          end
          tok = ""
          break
        when 0x3C
          # opening delimiter '<', start of new token
          @tokens << tok if tok.size > 0
          if peek_byte == 0x3C # check if token is actually '<<'
            @io.getbyte
            @tokens << "<<"
          else
            @tokens << "<"
          end
          tok = ""
          break
        when 0x3E
          # closing delimiter '>', start of new token
          @tokens << tok if tok.size > 0
          if peek_byte == 0x3E # check if token is actually '>>'
            @io.getbyte
            @tokens << ">>"
          else
            @tokens << ">"
          end
          tok = ""
          break
        when 0x28, 0x5B, 0x7B
          # opening delimiter, start of new token
          @tokens << tok if tok.size > 0
          @tokens << byte.chr
          tok = ""
          break
        when 0x29, 0x5D, 0x7D
          # closing delimiter
          @tokens << tok if tok.size > 0
          @tokens << byte.chr
          tok = ""
          break
        when 0x2F
          # PDF name, start of new token
          @tokens << tok if tok.size > 0
          @tokens << byte.chr
          @tokens << "" if byte == 0x2F && [nil, 0x20, 0x0A].include?(peek_byte)
          tok = ""
          break
        else
          tok << byte
        end
      end

      @tokens << tok if tok.size > 0
    end

    # peek at the next character in the io stream, leaving the stream position
    # untouched
    #
    def peek_byte
      byte = @io.getbyte
      @io.seek(-1, IO::SEEK_CUR) if byte
      byte
    end

    # for a handful of tokens we want to tell the parser how to convert them
    # into higher level tokens. This methods adds a to_token() method
    # to tokens that should remain as strings.
    #
    def string_token(token)
      def token.to_token
        to_s
      end
      token
    end
  end
end