/usr/share/pyshared/Pyrex/Plex/Scanners.py is in python-pyrex 0.9.8.5-2ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | #=======================================================================
#
# Python Lexical Analyser
#
#
# Scanning an input stream
#
#=======================================================================
import Errors
from Regexps import BOL, EOL, EOF
class Scanner:
"""
A Scanner is used to read tokens from a stream of characters
using the token set specified by a Plex.Lexicon.
Constructor:
Scanner(lexicon, stream, name = '')
See the docstring of the __init__ method for details.
Methods:
See the docstrings of the individual methods for more
information.
read() --> (value, text)
Reads the next lexical token from the stream.
position() --> (name, line, col)
Returns the position of the last token read using the
read() method.
begin(state_name)
Causes scanner to change state.
produce(value [, text])
Causes return of a token value to the caller of the
Scanner.
"""
lexicon = None # Lexicon
stream = None # file-like object
name = ''
buffer = ''
buf_start_pos = 0 # position in input of start of buffer
next_pos = 0 # position in input of next char to read
cur_pos = 0 # position in input of current char
cur_line = 1 # line number of current char
cur_line_start = 0 # position in input of start of current line
start_pos = 0 # position in input of start of token
start_line = 0 # line number of start of token
start_col = 0 # position in line of start of token
text = None # text of last token read
initial_state = None # Node
state_name = '' # Name of initial state
queue = None # list of tokens to be returned
trace = 0
def __init__(self, lexicon, stream, name = ''):
"""
Scanner(lexicon, stream, name = '')
|lexicon| is a Plex.Lexicon instance specifying the lexical tokens
to be recognised.
|stream| can be a file object or anything which implements a
compatible read() method.
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
self.lexicon = lexicon
self.stream = stream
self.name = name
self.queue = []
self.initial_state = None
self.begin('')
self.next_pos = 0
self.cur_pos = 0
self.cur_line_start = 0
self.cur_char = BOL
self.input_state = 1
def read(self):
"""
Read the next lexical token from the stream and return a
tuple (value, text), where |value| is the value associated with
the token as specified by the Lexicon, and |text| is the actual
string read from the stream. Returns (None, '') on end of file.
"""
queue = self.queue
while not queue:
self.text, action = self.scan_a_token()
if action is None:
self.produce(None)
self.eof()
else:
value = action.perform(self, self.text)
if value is not None:
self.produce(value)
result = queue[0]
del queue[0]
return result
def scan_a_token(self):
"""
Read the next input sequence recognised by the machine
and return (text, action). Returns ('', None) on end of
file.
"""
self.start_pos = self.cur_pos
self.start_line = self.cur_line
self.start_col = self.cur_pos - self.cur_line_start
# if self.trace:
# action = self.run_machine()
# else:
# action = self.run_machine_inlined()
action = self.run_machine_inlined()
if action:
if self.trace:
print "Scanner: read: Performing", action, "%d:%d" % (
self.start_pos, self.cur_pos)
base = self.buf_start_pos
text = self.buffer[self.start_pos - base : self.cur_pos - base]
return (text, action)
else:
if self.cur_pos == self.start_pos:
if self.cur_char == EOL:
self.next_char()
if not self.cur_char or self.cur_char == EOF:
return ('', None)
raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine(self):
"""
Run the machine until no more transitions are possible.
"""
self.state = self.initial_state
self.backup_state = None
while self.transition():
pass
return self.back_up()
def run_machine_inlined(self):
"""
Inlined version of run_machine for speed.
"""
state = self.initial_state
cur_pos = self.cur_pos
cur_line = self.cur_line
cur_line_start = self.cur_line_start
cur_char = self.cur_char
input_state = self.input_state
next_pos = self.next_pos
buffer = self.buffer
buf_start_pos = self.buf_start_pos
buf_len = len(buffer)
backup_state = None
trace = self.trace
while 1:
if trace: #TRACE#
print "State %d, %d/%d:%s -->" % ( #TRACE#
state['number'], input_state, cur_pos, repr(cur_char)), #TRACE#
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
if action:
backup_state = (
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
# End inlined self.save_for_backup()
c = cur_char
#new_state = state.new_state(c) #@slow
new_state = state.get(c, -1) #@fast
if new_state == -1: #@fast
new_state = c and state.get('else') #@fast
if new_state:
if trace: #TRACE#
print "State %d" % new_state['number'] #TRACE#
state = new_state
# Begin inlined: self.next_char()
if input_state == 1:
cur_pos = next_pos
# Begin inlined: c = self.read_char()
buf_index = next_pos - buf_start_pos
if buf_index < buf_len:
c = buffer[buf_index]
next_pos = next_pos + 1
else:
discard = self.start_pos - buf_start_pos
data = self.stream.read(0x1000)
buffer = self.buffer[discard:] + data
self.buffer = buffer
buf_start_pos = buf_start_pos + discard
self.buf_start_pos = buf_start_pos
buf_len = len(buffer)
buf_index = buf_index - discard
if data:
c = buffer[buf_index]
next_pos = next_pos + 1
else:
c = ''
# End inlined: c = self.read_char()
if c == '\n':
cur_char = EOL
input_state = 2
elif not c:
cur_char = EOL
input_state = 4
else:
cur_char = c
elif input_state == 2:
cur_char = '\n'
input_state = 3
elif input_state == 3:
cur_line = cur_line + 1
cur_line_start = cur_pos = next_pos
cur_char = BOL
input_state = 1
elif input_state == 4:
cur_char = EOF
input_state = 5
else: # input_state = 5
cur_char = ''
# End inlined self.next_char()
else: # not new_state
if trace: #TRACE#
print "blocked" #TRACE#
# Begin inlined: action = self.back_up()
if backup_state:
(action, cur_pos, cur_line, cur_line_start,
cur_char, input_state, next_pos) = backup_state
else:
action = None
break # while 1
# End inlined: action = self.back_up()
self.cur_pos = cur_pos
self.cur_line = cur_line
self.cur_line_start = cur_line_start
self.cur_char = cur_char
self.input_state = input_state
self.next_pos = next_pos
if trace: #TRACE#
if action: #TRACE#
print "Doing", action #TRACE#
return action
# def transition(self):
# self.save_for_backup()
# c = self.cur_char
# new_state = self.state.new_state(c)
# if new_state:
# if self.trace:
# print "Scanner: read: State %d: %s --> State %d" % (
# self.state.number, repr(c), new_state.number)
# self.state = new_state
# self.next_char()
# return 1
# else:
# if self.trace:
# print "Scanner: read: State %d: %s --> blocked" % (
# self.state.number, repr(c))
# return 0
# def save_for_backup(self):
# action = self.state.get_action()
# if action:
# if self.trace:
# print "Scanner: read: Saving backup point at", self.cur_pos
# self.backup_state = (
# action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos)
# def back_up(self):
# backup_state = self.backup_state
# if backup_state:
# (action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos) = backup_state
# if self.trace:
# print "Scanner: read: Backing up to", self.cur_pos
# return action
# else:
# return None
def next_char(self):
input_state = self.input_state
if self.trace:
print "Scanner: next:", " "*20, "[%d] %d" % (input_state, self.cur_pos),
if input_state == 1:
self.cur_pos = self.next_pos
c = self.read_char()
if c == '\n':
self.cur_char = EOL
self.input_state = 2
elif not c:
self.cur_char = EOL
self.input_state = 4
else:
self.cur_char = c
elif input_state == 2:
self.cur_char = '\n'
self.input_state = 3
elif input_state == 3:
self.cur_line = self.cur_line + 1
self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL
self.input_state = 1
elif input_state == 4:
self.cur_char = EOF
self.input_state = 5
else: # input_state = 5
self.cur_char = ''
if self.trace:
print "--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))
# def read_char(self):
# """
# Get the next input character, filling the buffer if necessary.
# Returns '' at end of file.
# """
# next_pos = self.next_pos
# buf_index = next_pos - self.buf_start_pos
# if buf_index == len(self.buffer):
# discard = self.start_pos - self.buf_start_pos
# data = self.stream.read(0x1000)
# self.buffer = self.buffer[discard:] + data
# self.buf_start_pos = self.buf_start_pos + discard
# buf_index = buf_index - discard
# if not data:
# return ''
# c = self.buffer[buf_index]
# self.next_pos = next_pos + 1
# return c
def position(self):
"""
Return a tuple (name, line, col) representing the location of
the last token read using the read() method. |name| is the
name that was provided to the Scanner constructor; |line|
is the line number in the stream (1-based); |col| is the
position within the line of the first character of the token
(0-based).
"""
return (self.name, self.start_line, self.start_col)
def begin(self, state_name):
"""Set the current state of the scanner to the named state."""
self.initial_state = (
self.lexicon.get_initial_state(state_name))
self.state_name = state_name
def produce(self, value, text = None):
"""
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
returned in place of the scanned text.
produce() can be called more than once during a single call to an action
procedure, in which case the tokens are queued up and returned one
at a time by subsequent calls to read(), until the queue is empty,
whereupon scanning resumes.
"""
if text is None:
text = self.text
self.queue.append((value, text))
def eof(self):
"""
Override this method if you want something to be done at
end of file.
"""
# For backward compatibility:
setattr(Scanner, "yield", Scanner.produce)
|