/usr/lib/python3/dist-packages/ptk/parser.py is in python3-ptk 1.3.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 | # -*- coding: UTF-8 -*-
# (c) Jérôme Laheurte 2015
# See LICENSE.txt
import six
import functools
import collections
import logging
import re
from ptk.lexer import ProgressiveLexer, EOF, token
from ptk.grammar import Grammar, Production, GrammarError
# production is only imported so that client code doesn't have to import it from grammar
from ptk.grammar import production # pylint: disable=W0611
from ptk.utils import Singleton, callbackByName
class ParseError(Exception):
"""
Syntax error when parsing.
:ivar token: The unexpected token.
"""
def __init__(self, grammar, tok, state):
self.token = tok
super(ParseError, self).__init__(six.u('Unexpected token "%s" in state "%s"') % (tok.value, sorted(state)))
self._expecting = set()
for terminal in grammar.tokenTypes():
if grammar.__actions__.get((state, terminal), None) is not None:
self._expecting.add(terminal)
def expecting(self):
"""
Returns a set of tokens types that would have been valid in input.
"""
return self._expecting
def leftAssoc(*operators):
"""
Class decorator for left associative operators. Use this to
decorate your :py:class:`Parser` class. Operators passed as
argument are assumed to have the same priority. The later you
declare associativity, the higher the priority; so the following
code
.. code-block:: python
@leftAssoc('+', '-')
@leftAssoc('*', '/')
class MyParser(LRParser):
# ...
declares '+' and '-' to be left associative, with the same
priority. '*' and '/' are also left associative, with a higher
priority than '+' and '-'.
See also the *priority* argument to :py:func:`production`.
"""
def _wrapper(cls):
cls.__precedence__.insert(0, ('left', set(operators)))
return cls
return _wrapper
def rightAssoc(*operators):
"""
Class decorator for right associative operators. Same remarks as :py:func:`leftAssoc`.
"""
def _wrapper(cls):
cls.__precedence__.insert(0, ('right', set(operators)))
return cls
return _wrapper
def nonAssoc(*operators):
"""
Class decorator for non associative operators. Same remarks as :py:func:`leftAssoc`.
"""
def _wrapper(cls):
cls.__precedence__.insert(0, ('non', set(operators)))
return cls
return _wrapper
class _StartState(six.with_metaclass(Singleton, object)):
__reprval__ = six.u('\u03A3') if six.PY3 else six.u('(START)')
class _ResolveError(Exception):
pass
@functools.total_ordering
class _Item(object):
def __init__(self, prod, dot, terminal):
self.production = prod
self.dot = dot
self.terminal = terminal
def shouldReduce(self):
"""
Returns True if the dot is in last position
"""
return self.dot == len(self.production.right)
def next(self):
"""
Returns an item with the dot advanced one position
"""
return _Item(self.production, self.dot + 1, self.terminal)
def __repr__(self):
symbols = list(self.production.right)
symbols.insert(self.dot, six.u('\u2022') if six.PY3 else six.u('.'))
return six.u('%s -> %s (%s)') % (self.production.name, six.u(' ').join([repr(sym) for sym in symbols]), self.terminal)
def __eq__(self, other):
return (self.production, self.dot, self.terminal) == (other.production, other.dot, other.terminal)
def __lt__(self, other):
return (self.production, self.dot, self.terminal) < (other.production, other.dot, other.terminal)
def __hash__(self):
return hash((self.production, self.dot, self.terminal))
class _Accept(BaseException):
def __init__(self, result):
self.result = result
super(_Accept, self).__init__()
_StackItem = collections.namedtuple('_StackItem', ['state', 'value'])
class _Shift(object):
def __init__(self, newState):
self.newState = newState
def doAction(self, grammar, stack, tok): # pylint: disable=W0613
stack.append(_StackItem(self.newState, tok.value))
return True
class _Reduce(object):
def __init__(self, item):
self.item = item
self.nargs = len(item.production.right)
def doAction(self, grammar, stack, tok): # pylint: disable=W0613
callback, kwargs = self._getCallback(stack)
self._applied(grammar, stack, callback(grammar, **kwargs))
return False
def _applied(self, grammar, stack, prodVal):
stack.append(_StackItem(grammar.goto(stack[-1].state, self.item.production.name), prodVal))
def _getCallback(self, stack):
if self.nargs:
args = [stackItem.value for stackItem in stack[-self.nargs:]]
stack[-self.nargs:] = []
else:
args = []
return self.item.production.apply(args)
class LRParser(Grammar):
"""
LR(1) parser. This class is intended to be used with a lexer class
derived from :py:class:`LexerBase`, using inheritance; it
overrides :py:func:`LexerBase.newToken` so you must inherit from
the parser first, then the lexer:
.. code-block:: python
class MyParser(LRParser, ReLexer):
# ...
"""
def __init__(self): # pylint: disable=R0914,R0912
super(LRParser, self).__init__()
self._restartParser()
def newToken(self, tok):
try:
for action, stack in self._processToken(tok):
if action.doAction(self, stack, tok):
break
except _Accept as exc:
self._restartParser()
self.newSentence(exc.result)
def _processToken(self, tok):
while True:
action = self.__actions__.get((self.__stack[-1].state, tok.type), None)
if action is None:
raise ParseError(self, tok, self.__stack[-1].state)
yield action, self.__stack
def newSentence(self, sentence): # pragma: no cover
"""
This is called when the start symbol has been reduced.
:param sentence: The value associated with the start symbol.
"""
raise NotImplementedError
@classmethod
def _createProductionParser(cls, name, priority):
return ProductionParser(callbackByName(name), priority, cls)
@classmethod
def _createReduceAction(cls, item):
return _Reduce(item)
@classmethod
def _createShiftAction(cls, state):
return _Shift(state)
@classmethod
def prepare(cls):
for prod in cls.productions():
if prod.name is _StartState:
break
else:
def acceptor(_, result):
raise _Accept(result)
prod = Production(_StartState, acceptor)
prod.addSymbol(cls._defaultStartSymbol() if cls.startSymbol is None else cls.startSymbol, name='result')
cls.__productions__.insert(0, prod)
cls.startSymbol = _StartState
super(LRParser, cls).prepare()
states, goto = cls.__computeStates(prod)
reachable = cls.__computeActions(states, goto)
logger = logging.getLogger('LRParser')
cls.__resolveConflicts(logger)
usedTokens = set([symbol for state, symbol in cls.__actions__.keys() if symbol is not EOF])
if usedTokens != cls.tokenTypes(): # pragma: no cover
logger.warning('The following tokens are not used: %s', ','.join([repr(sym) for sym in sorted(cls.tokenTypes() - usedTokens)]))
if reachable != cls.nonterminals(): # pragma: no cover
logger.warning('The following nonterminals are not reachable: %s', ','.join([repr(sym) for sym in sorted(cls.nonterminals() - reachable)]))
# Reductions only need goto entries for nonterminals
cls._goto = dict([((state, symbol), newState) for (state, symbol), newState in goto.items() if symbol not in cls.tokenTypes()])
parts = list()
if cls.nSR:
parts.append('%d shift/reduce conflicts' % cls.nSR)
if cls.nRR:
parts.append('%d reduce/reduce conflicts' % cls.nRR)
if parts:
logger.warning(', '.join(parts))
# Cast to tuple because sets are not totally ordered
for index, state in enumerate([tuple(cls._startState)] + sorted([tuple(state) for state in states if state != cls._startState])):
logger.debug('State %d', index)
for item in sorted(state):
logger.debug(' %s', item)
logger.info('%d states.', len(states))
@classmethod
def __computeStates(cls, start):
allSyms = cls.tokenTypes() | cls.nonterminals()
goto = dict()
cls._startState = frozenset([_Item(start, 0, EOF)])
states = set([cls._startState])
stack = [cls._startState]
while stack:
state = stack.pop()
stateClosure = cls.__itemSetClosure(state)
for symbol in allSyms:
# Compute goto(symbol, state)
nextState = set()
for item in stateClosure:
if not item.shouldReduce() and item.production.right[item.dot] == symbol:
nextState.add(item.next())
if nextState:
nextState = frozenset(nextState)
goto[(state, symbol)] = nextState
if nextState not in states:
states.add(nextState)
stack.append(nextState)
return states, goto
@classmethod
def __computeActions(cls, states, goto):
cls.__actions__ = dict()
reachable = set()
for state in states:
for item in cls.__itemSetClosure(state):
if item.shouldReduce():
action = cls._createReduceAction(item)
reachable.add(item.production.name)
cls.__addReduceAction(state, item.terminal, action)
else:
symbol = item.production.right[item.dot]
if symbol in cls.tokenTypes():
cls.__addShiftAction(state, symbol, cls._createShiftAction(goto[(state, symbol)]))
return reachable
@classmethod
def __shouldPreferShift(cls, logger, reduceAction, symbol):
logger.info('Shift/reduce conflict for "%s" on "%s"', reduceAction.item, symbol)
prodPrecedence = reduceAction.item.production.precedence(cls)
tokenPrecedence = cls.terminalPrecedence(symbol)
# If both precedences are specified, use priority/associativity
if prodPrecedence is not None and tokenPrecedence is not None:
prodAssoc, prodPrio = prodPrecedence
tokenAssoc, tokenPrio = tokenPrecedence
if prodPrio > tokenPrio:
logger.info('Resolving in favor of reduction because of priority')
return False
if prodPrio < tokenPrio:
logger.info('Resolving in favor of shift because of priority')
return True
if prodAssoc == tokenAssoc:
if prodAssoc == 'non':
logger.info('Resolving in favor of error because of associativity')
raise _ResolveError()
if prodAssoc == 'left':
logger.info('Resolving in favor of reduction because of associativity')
return False
logger.info('Resolving in favor of shift because of associativity')
return True
# At least one of those is not specified; use shift
logger.warning('Could not disambiguate shift/reduce conflict for "%s" on "%s"; using shift' % (reduceAction.item, symbol))
cls.nSR += 1
return True
@classmethod
def __resolveConflicts(cls, logger):
cls.nSR = 0
cls.nRR = 0
for (state, symbol), actions in sorted(cls.__actions__.items()):
action = actions.pop()
while actions:
conflicting = actions.pop()
try:
action = cls.__resolveConflict(logger, action, conflicting, symbol)
except _ResolveError:
del cls.__actions__[(state, symbol)]
break
else:
cls.__actions__[(state, symbol)] = action
@classmethod
def __resolveConflict(cls, logger, action1, action2, symbol):
if isinstance(action2, _Shift):
action1, action2 = action2, action1
if isinstance(action1, _Shift):
# Shift/reduce
return action1 if cls.__shouldPreferShift(logger, action2, symbol) else action2
# Reduce/reduce
logger.warning('Reduce/reduce conflict between "%s" and "%s"', action1.item, action2.item)
cls.nRR += 1
# Use the first one to be declared
for prod in cls.productions():
if prod == action1.item.production:
logger.warning('Using "%s', prod)
return action1
if prod == action2.item.production:
logger.warning('Using "%s', prod)
return action2
@classmethod
def __addReduceAction(cls, state, symbol, action):
cls.__actions__.setdefault((state, symbol), list()).append(action)
@classmethod
def __addShiftAction(cls, state, symbol, action):
for existing in cls.__actions__.get((state, symbol), list()):
if isinstance(existing, _Shift):
return
cls.__actions__.setdefault((state, symbol), list()).append(action)
@classmethod
def goto(cls, state, symbol):
return cls._goto[(state, symbol)]
def _restartParser(self):
self.__stack = [_StackItem(self._startState, None)]
@classmethod
def __itemSetClosure(cls, items):
result = set(items)
while True:
prev = set(result)
for item in [item for item in result if not item.shouldReduce()]:
symbol = item.production.right[item.dot]
if symbol not in cls.tokenTypes():
terminals = cls.first(*tuple(item.production.right[item.dot + 1:] + [item.terminal]))
for prod in (prod for prod in cls.productions() if prod.name == symbol):
for terminal in terminals:
result.add(_Item(prod, 0, terminal))
if prev == result:
break
return result
class ProductionParser(LRParser, ProgressiveLexer): # pylint: disable=R0904
# pylint: disable=C0111,C0103,R0201
def __init__(self, callback, priority, grammarClass): # pylint: disable=R0915
self.callback = callback
self.priority = priority
self.grammarClass = grammarClass
super(ProductionParser, self).__init__()
@classmethod
def prepare(cls, **kwargs): # pylint: disable=R0915
# Obviously cannot use @production here
# DECL -> identifier "->" PRODS
prod = Production('DECL', cls.DECL)
prod.addSymbol('identifier', 'name')
prod.addSymbol('arrow')
prod.addSymbol('PRODS', 'prods')
cls.__productions__.append(prod)
# PRODS -> P
prod = Production('PRODS', cls.PRODS1)
prod.addSymbol('P', 'prodlist')
cls.__productions__.append(prod)
# PRODS -> PRODS "|" P
prod = Production('PRODS', cls.PRODS2)
prod.addSymbol('PRODS', 'prods')
prod.addSymbol('union')
prod.addSymbol('P', 'prodlist')
cls.__productions__.append(prod)
# P -> P SYM
prod = Production('P', cls.P1)
prod.addSymbol('P', 'prodlist')
prod.addSymbol('SYM', 'sym')
cls.__productions__.append(prod)
# P -> ɛ
prod = Production('P', cls.P2)
cls.__productions__.append(prod)
# SYM -> SYMNAME PROPERTIES
prod = Production('SYM', cls.SYM)
prod.addSymbol('SYMNAME', 'symname')
prod.addSymbol('PROPERTIES', 'properties')
cls.__productions__.append(prod)
# SYM -> SYMNAME repeat PROPERTIES
prod = Production('SYM', cls.SYMREP)
prod.addSymbol('SYMNAME', 'symname')
prod.addSymbol('repeat', 'repeat')
prod.addSymbol('PROPERTIES', 'properties')
cls.__productions__.append(prod)
# SYM -> SYMNAME repeat lparen identifier rparen PROPERTIES
prod = Production('SYM', cls.SYMREP)
prod.addSymbol('SYMNAME', 'symname')
prod.addSymbol('repeat', 'repeat')
prod.addSymbol('lparen')
prod.addSymbol('identifier', 'separator')
prod.addSymbol('rparen')
prod.addSymbol('PROPERTIES', 'properties')
cls.__productions__.append(prod)
# SYM -> SYMNAME repeat lparen litteral rparen PROPERTIES
prod = Production('SYM', cls.SYMREP_LIT)
prod.addSymbol('SYMNAME', 'symname')
prod.addSymbol('repeat', 'repeat')
prod.addSymbol('lparen')
prod.addSymbol('litteral', 'separator')
prod.addSymbol('rparen')
prod.addSymbol('PROPERTIES', 'properties')
cls.__productions__.append(prod)
# SYMNAME -> identifier
prod = Production('SYMNAME', cls.SYMNAME1)
prod.addSymbol('identifier', 'identifier')
cls.__productions__.append(prod)
# SYMNAME -> litteral
prod = Production('SYMNAME', cls.SYMNAME2)
prod.addSymbol('litteral', 'litteral')
cls.__productions__.append(prod)
# PROPERTIES -> ɛ
prod = Production('PROPERTIES', cls.PROPERTIES1)
cls.__productions__.append(prod)
# PROPERTIES -> lchev identifier rchev
prod = Production('PROPERTIES', cls.PROPERTIES2)
prod.addSymbol('lchev')
prod.addSymbol('identifier', 'name')
prod.addSymbol('rchev')
cls.__productions__.append(prod)
super(ProductionParser, cls).prepare(**kwargs)
def newSentence(self, startSymbol):
name, prods = startSymbol
for prod in prods:
if prod.name is None:
prod.name = name
self.grammarClass.__productions__.extend(prods)
# Lexer
@staticmethod
def ignore(char):
return char in ' \t\n'
@token('->')
def arrow(self, tok):
pass
@token('<')
def lchev(self, tok):
pass
@token('>')
def rchev(self, tok):
pass
@token(r'\|')
def union(self, tok):
pass
@token(r'\*|\+|\?')
def repeat(self, tok):
pass
@token(r'\(')
def lparen(self, tok):
pass
@token(r'\)')
def rparen(self, tok):
pass
@token('[a-zA-Z_][a-zA-Z0-9_]*')
def identifier(self, tok):
pass
@token(r'"|\'')
def litteral(self, tok):
class StringBuilder(object):
def __init__(self, quotetype):
self.quotetype = quotetype
self.chars = list()
self.state = 0
def feed(self, char):
if self.state == 0:
if char == '\\':
self.state = 1
elif char == self.quotetype:
return 'litteral', ''.join(self.chars)
else:
self.chars.append(char)
elif self.state == 1:
self.chars.append(char)
self.state = 0
self.setConsumer(StringBuilder(tok.value))
# Parser
def DECL(self, name, prods):
if name in self.grammarClass.tokenTypes():
raise GrammarError('"%s" is a token name and cannot be used as non-terminal' % name)
return (name, prods)
def PRODS1(self, prodlist):
return prodlist
def PRODS2(self, prods, prodlist):
prods.extend(prodlist)
return prods
def P1(self, sym, prodlist):
result = list()
symbol, properties, repeat, sep = sym
for prod in prodlist:
if prod.name is None:
if repeat is None:
prod.addSymbol(symbol, name=properties.get('name', None))
result.append(prod)
elif repeat == '?':
if sep is not None:
raise GrammarError('A separator makes no sense for "?"')
self.__addAtMostOne(result, prod, symbol, properties.get('name', None))
elif repeat in ['*', '+']:
self.__addList(result, prod, symbol, properties.get('name', None), repeat == '*', sep)
else:
result.append(prod)
return result
def __addAtMostOne(self, productions, prod, symbol, name):
clone = prod.cloned()
if name is not None:
self._wrapCallbackNone(name, clone)
productions.append(clone)
prod.addSymbol(symbol, name=name)
productions.append(prod)
def _wrapCallbackNone(self, name, prod):
previous = prod.callback
def callback(*args, **kwargs):
kwargs[name] = None
return previous(*args, **kwargs)
prod.callback = callback
def __addList(self, productions, prod, symbol, name, allowEmpty, sep):
class ListSymbol(six.with_metaclass(Singleton, object)):
__reprval__ = six.u('List(%s, "%s")') % (symbol, six.u('*') if allowEmpty else six.u('+'))
if allowEmpty:
clone = prod.cloned()
self._wrapCallbackEmpty(name, clone)
productions.append(clone)
prod.addSymbol(ListSymbol, name=name)
productions.append(prod)
listProd = Production(ListSymbol, self._wrapCallbackOne())
listProd.addSymbol(symbol, name='item')
productions.append(listProd)
listProd = Production(ListSymbol, self._wrapCallbackNext())
listProd.addSymbol(ListSymbol, name='items')
if sep is not None:
listProd.addSymbol(sep)
listProd.addSymbol(symbol, name='item')
productions.append(listProd)
def _wrapCallbackEmpty(self, name, prod):
previous = prod.callback
def cbEmpty(*args, **kwargs):
if name is not None:
kwargs[name] = []
return previous(*args, **kwargs)
prod.callback = cbEmpty
def _wrapCallbackOne(self):
def cbOne(_, item):
return [item]
return cbOne
def _wrapCallbackNext(self):
def cbNext(_, items, item):
items.append(item)
return items
return cbNext
def P2(self):
# 'name' is replaced in newSentence()
return [Production(None, self.callback, priority=self.priority)]
def SYMNAME1(self, identifier):
return identifier
def SYMNAME2(self, litteral):
name = litteral
if name not in self.grammarClass.tokenTypes():
self.grammarClass.addTokenType(name, lambda s, tok: None, re.escape(name), None)
return name
def SYM(self, symname, properties):
return (symname, properties, None, None)
def SYMREP(self, symname, repeat, properties, separator=None):
return (symname, properties, repeat, separator)
def SYMREP_LIT(self, symname, repeat, properties, separator):
if separator not in self.grammarClass.tokenTypes():
self.grammarClass.addTokenType(separator, lambda s, tok: None, re.escape(separator), None)
return self.SYMREP(symname, repeat, properties, separator)
def PROPERTIES1(self):
return dict()
def PROPERTIES2(self, name):
return dict(name=name)
|