/usr/share/pyshared/jsonpath_rw/lexer.py is in python-jsonpath-rw 1.2.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
import sys
import logging
import ply.lex
logger = logging.getLogger(__name__)
class JsonPathLexer(object):
'''
A Lexical analyzer for JsonPath.
'''
def __init__(self, debug=False):
self.debug = debug
if self.__doc__ == None:
raise Exception('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
def tokenize(self, string):
'''
Maps a string to an iterator over tokens. In other words: [char] -> [token]
'''
new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
new_lexer.latest_newline = 0
new_lexer.input(string)
while True:
t = new_lexer.token()
if t is None: break
t.col = t.lexpos - new_lexer.latest_newline
yield t
# ============== PLY Lexer specification ==================
#
# This probably should be private but:
# - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
# - things like `literals` might be a legitimate part of the public interface.
#
# Anyhow, it is pythonic to give some rope to hang oneself with :-)
literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&']
reserved_words = { 'where': 'WHERE' }
tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())
states = [ ('singlequote', 'exclusive'),
('doublequote', 'exclusive'),
('backquote', 'exclusive') ]
# Normal lexing, rather easy
t_DOUBLEDOT = r'\.\.'
t_ignore = ' \t'
def t_ID(self, t):
r'[a-zA-Z_@][a-zA-Z0-9_@]*'
t.type = self.reserved_words.get(t.value, 'ID')
return t
def t_NUMBER(self, t):
r'\d+'
t.value = int(t.value)
return t
# Single-quoted strings
t_singlequote_ignore = ''
def t_SINGLEQUOTE(self, t):
r'\''
t.lexer.string_start = t.lexer.lexpos
t.lexer.push_state('singlequote')
def t_singlequote_SINGLEQUOTE(self, t):
r"([^']|\\')*'"
t.value = t.value[:-1]
t.type = 'ID'
t.lexer.pop_state()
return t
def t_singlequote_error(self, t):
raise Exception('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.latest_newline, t.value[0]))
# Double-quoted strings
t_doublequote_ignore = ''
def t_DOUBLEQUOTE(self, t):
r'"'
t.lexer.string_start = t.lexer.lexpos
t.lexer.push_state('doublequote')
def t_doublequote_DOUBLEQUOTE(self, t):
r'([^"]|\\")*"'
t.value = t.value[:-1]
t.type = 'ID'
t.lexer.pop_state()
return t
def t_doublequote_error(self, t):
raise Exception('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.latest_newline, t.value[0]))
# Back-quoted "magic" operators
t_backquote_ignore = ''
def t_BACKQUOTE(self, t):
r'`'
t.lexer.string_start = t.lexer.lexpos
t.lexer.push_state('backquote')
def t_backquote_BACKQUOTE(self, t):
r'([^`]|\\`)*`'
t.value = t.value[:-1]
t.type = 'NAMED_OPERATOR'
t.lexer.pop_state()
return t
def t_backquote_error(self, t):
raise Exception('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.latest_newline, t.value[0]))
# Counting lines, handling errors
def t_newline(self, t):
r'\n'
t.lexer.lineno += 1
t.lexer.latest_newline = t.lexpos
def t_error(self, t):
raise Exception('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.latest_newline, t.value[0]))
if __name__ == '__main__':
logging.basicConfig()
lexer = JsonPathLexer(debug=True)
for token in lexer.tokenize(sys.stdin.read()):
print('%-20s%s' % (token.value, token.type))
|