/usr/lib/ruby/vendor_ruby/rugments/regex_lexer.rb is in ruby-rugments 1.0.0~beta8-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 | module Rugments
# @abstract
# A stateful lexer that uses sets of regular expressions to
# tokenize a string. Most lexers are instances of RegexLexer.
class RegexLexer < Lexer
# A rule is a tuple of a regular expression to test, and a callback
# to perform if the test succeeds.
#
# @see StateDSL#rule
class Rule
attr_reader :callback
attr_reader :re
attr_reader :beginning_of_line
def initialize(re, callback)
@re = re
@callback = callback
@beginning_of_line = re.source[0] == '^'
end
def inspect
"#<Rule #{@re.inspect}>"
end
end
# a State is a named set of rules that can be tested for or
# mixed in.
#
# @see RegexLexer.state
class State
attr_reader :name, :rules
def initialize(name, rules)
@name = name
@rules = rules
end
def inspect
"#<#{self.class.name} #{@name.inspect}>"
end
end
class StateDSL
attr_reader :rules
def initialize(name, &defn)
@name = name
@defn = defn
@rules = []
end
def to_state(lexer_class)
load!
rules = @rules.map do |rule|
rule.is_a?(String) ? lexer_class.get_state(rule) : rule
end
State.new(@name, rules)
end
def prepended(&defn)
parent_defn = @defn
StateDSL.new(@name) do
instance_eval(&defn)
instance_eval(&parent_defn)
end
end
def appended(&defn)
parent_defn = @defn
StateDSL.new(@name) do
instance_eval(&parent_defn)
instance_eval(&defn)
end
end
protected
# Define a new rule for this state.
#
# @overload rule(re, token, next_state=nil)
# @overload rule(re, &callback)
#
# @param [Regexp] re
# a regular expression for this rule to test.
# @param [String] tok
# the token type to yield if `re` matches.
# @param [#to_s] next_state
# (optional) a state to push onto the stack if `re` matches.
# If `next_state` is `:pop!`, the state stack will be popped
# instead.
# @param [Proc] callback
# a block that will be evaluated in the context of the lexer
# if `re` matches. This block has access to a number of lexer
# methods, including {RegexLexer#push}, {RegexLexer#pop!},
# {RegexLexer#token}, and {RegexLexer#delegate}. The first
# argument can be used to access the match groups.
def rule(re, tok = nil, next_state = nil, &callback)
if tok.nil? && callback.nil?
fail 'please pass `rule` a token to yield or a callback'
end
callback ||= case next_state
when :pop!
proc do |stream|
puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
@output_stream.call(tok, stream[0])
puts " popping stack: #{1}" if @debug
@stack.pop || fail('empty stack!')
end
when :push
proc do |stream|
puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
@output_stream.call(tok, stream[0])
puts " pushing #{@stack.last.name}" if @debug
@stack.push(@stack.last)
end
when Symbol
proc do |stream|
puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
@output_stream.call(tok, stream[0])
state = @states[next_state] || self.class.get_state(next_state)
puts " pushing #{state.name}" if @debug
@stack.push(state)
end
when nil
proc do |stream|
puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
@output_stream.call(tok, stream[0])
end
else
fail "invalid next state: #{next_state.inspect}"
end
rules << Rule.new(re, callback)
end
# Mix in the rules from another state into this state. The rules
# from the mixed-in state will be tried in order before moving on
# to the rest of the rules in this state.
def mixin(state)
rules << state.to_s
end
private
def load!
return if @loaded
@loaded = true
instance_eval(&@defn)
end
end
# The states hash for this lexer.
# @see state
def self.states
@states ||= {}
end
def self.state_definitions
@state_definitions ||= InheritableHash.new(superclass.state_definitions)
end
@state_definitions = {}
def self.replace_state(name, new_defn)
states[name] = nil
state_definitions[name] = new_defn
end
# The routines to run at the beginning of a fresh lex.
# @see start
def self.start_procs
@start_procs ||= InheritableList.new(superclass.start_procs)
end
@start_procs = []
# Specify an action to be run every fresh lex.
#
# @example
# start { puts "I'm lexing a new string!" }
def self.start(&b)
start_procs << b
end
# Define a new state for this lexer with the given name.
# The block will be evaluated in the context of a {StateDSL}.
def self.state(name, &b)
name = name.to_s
state_definitions[name] = StateDSL.new(name, &b)
end
def self.prepend(name, &b)
name = name.to_s
dsl = state_definitions[name] or fail "no such state #{name.inspect}"
replace_state(name, dsl.prepended(&b))
end
def self.append(_state, &b)
name = name.to_s
dsl = state_definitions[name] or fail "no such state #{name.inspect}"
replace_state(name, dsl.appended(&b))
end
# @private
def self.get_state(name)
return name if name.is_a? State
states[name.to_sym] ||= begin
defn = state_definitions[name.to_s] or fail "unknown state: #{name.inspect}"
defn.to_state(self)
end
end
# @private
def get_state(state_name)
self.class.get_state(state_name)
end
# The state stack. This is initially the single state `[:root]`.
# It is an error for this stack to be empty.
# @see #state
def stack
@stack ||= [get_state(:root)]
end
# The current state - i.e. one on top of the state stack.
#
# NB: if the state stack is empty, this will throw an error rather
# than returning nil.
def state
stack.last || fail('empty stack!')
end
# reset this lexer to its initial state. This runs all of the
# start_procs.
def reset!
@stack = nil
@current_stream = nil
self.class.start_procs.each do |pr|
instance_eval(&pr)
end
end
# This implements the lexer protocol, by yielding [token, value] pairs.
#
# The process for lexing works as follows, until the stream is empty:
#
# 1. We look at the state on top of the stack (which by default is
# `[:root]`).
# 2. Each rule in that state is tried until one is successful. If one
# is found, that rule's callback is evaluated - which may yield
# tokens and manipulate the state stack. Otherwise, one character
# is consumed with an `'Error'` token, and we continue at (1.)
#
# @see #step #step (where (2.) is implemented)
def stream_tokens(str, &b)
stream = StringScanner.new(str)
@current_stream = stream
@output_stream = b
@states = self.class.states
@null_steps = 0
until stream.eos?
if @debug
puts "lexer: #{self.class.tag}"
puts "stack: #{stack.map(&:name).inspect}"
puts "stream: #{stream.peek(20).inspect}"
end
success = step(state, stream)
unless success
puts ' no match, yielding Error' if @debug
b.call(Token::Tokens::Error, stream.getch)
end
end
end
# The number of successive scans permitted without consuming
# the input stream. If this is exceeded, the match fails.
MAX_NULL_SCANS = 5
# Runs one step of the lex. Rules in the current state are tried
# until one matches, at which point its callback is called.
#
# @return true if a rule was tried successfully
# @return false otherwise.
def step(state, stream)
state.rules.each do |rule|
if rule.is_a?(State)
puts " entering mixin #{rule.name}" if @debug
return true if step(rule, stream)
puts " exiting mixin #{rule.name}" if @debug
else
puts " trying #{rule.inspect}" if @debug
# XXX HACK XXX
# StringScanner's implementation of ^ is b0rken.
# see http://bugs.ruby-lang.org/issues/7092
# TODO: this doesn't cover cases like /(a|^b)/, but it's
# the most common, for now...
next if rule.beginning_of_line && !stream.beginning_of_line?
if size = stream.skip(rule.re)
puts " got #{stream[0].inspect}" if @debug
instance_exec(stream, &rule.callback)
if size.zero?
@null_steps += 1
if @null_steps > MAX_NULL_SCANS
puts ' too many scans without consuming the string!' if @debug
return false
end
else
@null_steps = 0
end
return true
end
end
end
false
end
# Yield a token.
#
# @param tok
# the token type
# @param val
# (optional) the string value to yield. If absent, this defaults
# to the entire last match.
def token(tok, val = @current_stream[0])
yield_token(tok, val)
end
# Yield tokens corresponding to the matched groups of the current
# match.
def groups(*tokens)
tokens.each_with_index do |tok, i|
yield_token(tok, @current_stream[i + 1])
end
end
# Delegate the lex to another lexer. The #lex method will be called
# with `:continue` set to true, so that #reset! will not be called.
# In this way, a single lexer can be repeatedly delegated to while
# maintaining its own internal state stack.
#
# @param [#lex] lexer
# The lexer or lexer class to delegate to
# @param [String] text
# The text to delegate. This defaults to the last matched string.
def delegate(lexer, text = nil)
puts " delegating to #{lexer.inspect}" if @debug
text ||= @current_stream[0]
lexer.lex(text, continue: true) do |tok, val|
puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug
yield_token(tok, val)
end
end
def recurse(text = nil)
delegate(self.class, text)
end
# Push a state onto the stack. If no state name is given and you've
# passed a block, a state will be dynamically created using the
# {StateDSL}.
def push(state_name = nil, &b)
push_state = if state_name
get_state(state_name)
elsif block_given?
StateDSL.new(b.inspect, &b).to_state(self.class)
else
# use the top of the stack by default
state
end
puts " pushing #{push_state.name}" if @debug
stack.push(push_state)
end
# Pop the state stack. If a number is passed in, it will be popped
# that number of times.
def pop!(times = 1)
fail 'empty stack!' if stack.empty?
puts " popping stack: #{times}" if @debug
stack.pop(times)
nil
end
# replace the head of the stack with the given state
def goto(state_name)
fail 'empty stack!' if stack.empty?
puts " going to state #{state_name} " if @debug
stack[-1] = get_state(state_name)
end
# reset the stack back to `[:root]`.
def reset_stack
puts ' resetting stack' if @debug
stack.clear
stack.push get_state(:root)
end
# Check if `state_name` is in the state stack.
def in_state?(state_name)
state_name = state_name.to_s
stack.any? do |state|
state.name == state_name.to_s
end
end
# Check if `state_name` is the state on top of the state stack.
def state?(state_name)
state_name.to_s == state.name
end
private
def yield_token(tok, val)
return if val.nil? || val.empty?
puts " yielding #{tok.qualname}, #{val.inspect}" if @debug
@output_stream.yield(tok, val)
end
end
end
|