This file is indexed.

/usr/lib/ruby/vendor_ruby/rugments/lexer.rb is in ruby-rugments 1.0.0~beta8-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
require 'strscan'
require 'cgi'
require 'set'


module Rugments
  # @abstract
  # A lexer transforms text into a stream of `[token, chunk]` pairs.
  class Lexer
    include Token::Tokens

    class << self
      # Lexes `stream` with the given options.  The lex is delegated to a
      # new instance.
      #
      # @see #lex
      def lex(stream, opts = {}, &b)
        new(opts).lex(stream, &b)
      end

      def default_options(o = {})
        @default_options ||= {}
        @default_options.merge!(o)
        @default_options
      end

      # Given a string, return the correct lexer class.
      def find(name)
        registry[name.to_s]
      end

      # Find a lexer, with fancy shiny features.
      #
      # * The string you pass can include CGI-style options
      #
      #     Lexer.find_fancy('erb?parent=tex')
      #
      # * You can pass the special name 'guess' so we guess for you,
      #   and you can pass a second argument of the code to guess by
      #
      #     Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
      #
      # This is used in the Redcarpet plugin as well as Rouge's own
      # markdown lexer for highlighting internal code blocks.
      #
      def find_fancy(str, code = nil)
        name, opts = str ? str.split('?', 2) : [nil, '']

        # parse the options hash from a cgi-style string
        opts = CGI.parse(opts || '').map do |k, vals|
          [k.to_sym, vals.empty? ? true : vals[0]]
        end

        opts = Hash[opts]

        lexer_class = case name
        when 'guess', nil
          guess(source: code, mimetype: opts[:mimetype])
        when String
          find(name)
        end

        lexer_class && lexer_class.new(opts)
      end

      # Specify or get this lexer's title. Meant to be human-readable.
      def title(t=nil)
        if t.nil?
          t = tag.capitalize
        end
        @title ||= t
      end

      # Specify or get this lexer's description.
      def desc(arg = :absent)
        if arg == :absent
          @desc
        else
          @desc = arg
        end
      end

      # Specify or get the path name containing a small demo for
      # this lexer (can be overriden by {demo}).
      def demo_file(arg = :absent)
        return @demo_file = Pathname.new(arg) unless arg == :absent

        @demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
      end

      # Specify or get a small demo string for this lexer
      def demo(arg = :absent)
        return @demo = arg unless arg == :absent

        @demo = File.read(demo_file, encoding: 'utf-8')
      end

      # @return a list of all lexers.
      def all
        registry.values.uniq
      end

      # Guess which lexer to use based on a hash of info.
      #
      # This accepts the same arguments as Lexer.guess, but will never throw
      # an error.  It will return a (possibly empty) list of potential lexers
      # to use.
      def guesses(info = {})
        mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
        lexers = registry.values.uniq
        total_size = lexers.size

        lexers = filter_by_mimetype(lexers, mimetype) if mimetype
        return lexers if lexers.size == 1

        lexers = filter_by_filename(lexers, filename) if filename
        return lexers if lexers.size == 1

        if source
          # If we're filtering against *all* lexers, we only use confident return
          # values from analyze_text.  But if we've filtered down already, we can trust
          # the analysis more.
          source_threshold = lexers.size < total_size ? 0 : 0.5
          return [best_by_source(lexers, source, source_threshold)].compact
        end

        []
      end

      class AmbiguousGuess < StandardError
        attr_reader :alternatives

        def initialize(alternatives)
          @alternatives = alternatives
        end

        def message
          "Ambiguous guess: can't decide between #{alternatives.map(&:tag).inspect}"
        end
      end

      # Guess which lexer to use based on a hash of info.
      #
      # @option info :mimetype
      #   A mimetype to guess by
      # @option info :filename
      #   A filename to guess by
      # @option info :source
      #   The source itself, which, if guessing by mimetype or filename
      #   fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
      #   other hints.
      #
      # @see Lexer.analyze_text
      # @see Lexer.multi_guess
      def guess(info = {})
        lexers = guesses(info)

        return Lexers::PlainText if lexers.empty?
        return lexers[0] if lexers.size == 1

        fail AmbiguousGuess.new(lexers)
      end

      def guess_by_mimetype(mt)
        guess mimetype: mt
      end

      def guess_by_filename(fname)
        guess filename: fname
      end

      def guess_by_source(source)
        guess source: source
      end

      private

      def filter_by_mimetype(lexers, mt)
        filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
        filtered.any? ? filtered : lexers
      end

      # returns a list of lexers that match the given filename with
      # equal specificity (i.e. number of wildcards in the pattern).
      # This helps disambiguate between, e.g. the Nginx lexer, which
      # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
      # In this case, nginx will win because the pattern has no wildcards,
      # while `*.conf` has one.
      def filter_by_filename(lexers, fname)
        fname = File.basename(fname)

        out = []
        best_seen = nil
        lexers.each do |lexer|
          score = lexer.filenames.map do |pattern|
            if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
              # specificity is better the fewer wildcards there are
              pattern.scan(/[*?\[]/).size
            end
          end.compact.min

          next unless score

          if best_seen.nil? || score < best_seen
            best_seen = score
            out = [lexer]
          elsif score == best_seen
            out << lexer
          end
        end

        out.any? ? out : lexers
      end

      def best_by_source(lexers, source, threshold = 0)
        source = case source
        when String
          source
        when ->(s) { s.respond_to? :read }
          source.read
        else
          fail 'invalid source'
        end

        assert_utf8!(source)

        source = TextAnalyzer.new(source)

        best_result = threshold
        best_match = nil
        lexers.each do |lexer|
          result = lexer.analyze_text(source) || 0
          return lexer if result == 1

          if result > best_result
            best_match = lexer
            best_result = result
          end
        end

        best_match
      end

      protected

      # @private
      def register(name, lexer)
        registry[name.to_s] = lexer
      end

      public

      # Used to specify or get the canonical name of this lexer class.
      #
      # @example
      #   class MyLexer < Lexer
      #     tag 'foo'
      #   end
      #
      #   MyLexer.tag # => 'foo'
      #
      #   Lexer.find('foo') # => MyLexer
      def tag(t = nil)
        return @tag if t.nil?

        @tag = t.to_s
        Lexer.register(@tag, self)
      end

      # Used to specify alternate names this lexer class may be found by.
      #
      # @example
      #   class Erb < Lexer
      #     tag 'erb'
      #     aliases 'eruby', 'rhtml'
      #   end
      #
      #   Lexer.find('eruby') # => Erb
      def aliases(*args)
        args.map!(&:to_s)
        args.each { |arg| Lexer.register(arg, self) }
        (@aliases ||= []).concat(args)
      end

      # Specify a list of filename globs associated with this lexer.
      #
      # @example
      #   class Ruby < Lexer
      #     filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
      #   end
      def filenames(*fnames)
        (@filenames ||= []).concat(fnames)
      end

      # Specify a list of mimetypes associated with this lexer.
      #
      # @example
      #   class Html < Lexer
      #     mimetypes 'text/html', 'application/xhtml+xml'
      #   end
      def mimetypes(*mts)
        (@mimetypes ||= []).concat(mts)
      end

      # @private
      def assert_utf8!(str)
        return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
        fail EncodingError.new(
          "Bad encoding: #{str.encoding.names.join(',')}. " \
          'Please convert your string to UTF-8.'
        )
      end

      private

      def registry
        @registry ||= {}
      end
    end

    # -*- instance methods -*- #

    # Create a new lexer with the given options.  Individual lexers may
    # specify extra options.  The only current globally accepted option
    # is `:debug`.
    #
    # @option opts :debug
    #   Prints debug information to stdout.  The particular info depends
    #   on the lexer in question.  In regex lexers, this will log the
    #   state stack at the beginning of each step, along with each regex
    #   tried and each stream consumed.  Try it, it's pretty useful.
    def initialize(opts = {})
      options(opts)

      @debug = option(:debug)
    end

    # get and/or specify the options for this lexer.
    def options(o = {})
      (@options ||= {}).merge!(o)

      self.class.default_options.merge(@options)
    end

    # get or specify one option for this lexer
    def option(k, v = :absent)
      if v == :absent
        options[k]
      else
        options(k => v)
      end
    end

    # @abstract
    #
    # Called after each lex is finished.  The default implementation
    # is a noop.
    def reset!
    end

    # Given a string, yield [token, chunk] pairs.  If no block is given,
    # an enumerator is returned.
    #
    # @option opts :continue
    #   Continue the lex from the previous state (i.e. don't call #reset!)
    def lex(string, opts = {}, &b)
      return enum_for(:lex, string, opts) unless block_given?

      Lexer.assert_utf8!(string)

      reset! unless opts[:continue]

      # consolidate consecutive tokens of the same type
      last_token = nil
      last_val = nil
      stream_tokens(string) do |tok, val|
        next if val.empty?

        if tok == last_token
          last_val << val
          next
        end

        b.call(last_token, last_val) if last_token
        last_token = tok
        last_val = val
      end

      b.call(last_token, last_val) if last_token
    end

    # delegated to {Lexer.tag}
    def tag
      self.class.tag
    end

    # @abstract
    #
    # Yield `[token, chunk]` pairs, given a prepared input stream.  This
    # must be implemented.
    #
    # @param [StringScanner] stream
    #   the stream
    def stream_tokens(_stream, &_b)
      fail 'abstract'
    end

    # @abstract
    #
    # Return a number between 0 and 1 indicating the likelihood that
    # the text given should be lexed with this lexer.  The default
    # implementation returns 0.  Values under 0.5 will only be used
    # to disambiguate filename or mimetype matches.
    #
    # @param [TextAnalyzer] text
    #   the text to be analyzed, with a couple of handy methods on it,
    #   like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
    def self.analyze_text(_text)
      0
    end
  end

  module Lexers
    def self.load_const(const_name, relpath)
      return if const_defined?(const_name)

      root = Pathname.new(__FILE__).dirname.join('lexers')
      load root.join(relpath)
    end
  end
end