/usr/lib/python2.7/dist-packages/patsy/tokens.py is in python-patsy 0.3.0-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | # This file is part of Patsy
# Copyright (C) 2011 Nathaniel Smith <njs@pobox.com>
# See file LICENSE.txt for license information.
# Utilities for dealing with Python code at the token level.
#
# Includes:
# a "pretty printer" that converts a sequence of tokens back into a
# readable, white-space normalized string.
# a utility function to replace calls to global functions with calls to
# other functions
import tokenize
from six.moves import cStringIO as StringIO
from patsy import PatsyError
from patsy.origin import Origin
__all__ = ["python_tokenize", "pretty_untokenize",
"normalize_token_spacing"]
# A convenience wrapper around tokenize.generate_tokens. yields tuples
# (tokenize type, token string, origin object)
def python_tokenize(code):
# Since formulas can only contain Python expressions, and Python
# expressions cannot meaningfully contain newlines, we'll just remove all
# the newlines up front to avoid any complications:
code = code.replace("\n", " ").strip()
it = tokenize.generate_tokens(StringIO(code).readline)
try:
for (pytype, string, (_, start), (_, end), code) in it:
if pytype == tokenize.ENDMARKER:
break
origin = Origin(code, start, end)
assert pytype not in (tokenize.NL, tokenize.NEWLINE)
if pytype == tokenize.ERRORTOKEN:
raise PatsyError("error tokenizing input "
"(maybe an unclosed string?)",
origin)
if pytype == tokenize.COMMENT:
raise PatsyError("comments are not allowed", origin)
yield (pytype, string, origin)
else: # pragma: no cover
raise ValueError("stream ended without ENDMARKER?!?")
except tokenize.TokenError as e:
# TokenError is raised iff the tokenizer thinks that there is
# some sort of multi-line construct in progress (e.g., an
# unclosed parentheses, which in Python lets a virtual line
# continue past the end of the physical line), and it hits the
# end of the source text. We have our own error handling for
# such cases, so just treat this as an end-of-stream.
#
# Just in case someone adds some other error case:
assert e.args[0].startswith("EOF in multi-line")
return
def test_python_tokenize():
code = "a + (foo * -1)"
tokens = list(python_tokenize(code))
expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
(tokenize.OP, "+", Origin(code, 2, 3)),
(tokenize.OP, "(", Origin(code, 4, 5)),
(tokenize.NAME, "foo", Origin(code, 5, 8)),
(tokenize.OP, "*", Origin(code, 9, 10)),
(tokenize.OP, "-", Origin(code, 11, 12)),
(tokenize.NUMBER, "1", Origin(code, 12, 13)),
(tokenize.OP, ")", Origin(code, 13, 14))]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
(tokenize.OP, "+", Origin(code2, 2, 3)),
(tokenize.OP, "(", Origin(code2, 4, 5)),
(tokenize.NAME, "b", Origin(code2, 5, 6))]
assert tokens2 == expected2
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b # c"))
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b \"c"))
_python_space_both = (list("+-*/%&^|<>")
+ ["==", "<>", "!=", "<=", ">=",
"<<", ">>", "**", "//"])
_python_space_before = (_python_space_both
+ ["!", "~"])
_python_space_after = (_python_space_both
+ [",", ":"])
def pretty_untokenize(typed_tokens):
text = []
prev_was_space_delim = False
prev_wants_space = False
prev_was_open_paren_or_comma = False
prev_was_object_like = False
brackets = []
for token_type, token in typed_tokens:
assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
tokenize.NEWLINE, tokenize.NL)
if token_type == tokenize.ENDMARKER:
continue
if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
if prev_wants_space or prev_was_space_delim:
text.append(" ")
text.append(token)
prev_wants_space = False
prev_was_space_delim = True
else:
if token in ("(", "[", "{"):
brackets.append(token)
elif brackets and token in (")", "]", "}"):
brackets.pop()
this_wants_space_before = (token in _python_space_before)
this_wants_space_after = (token in _python_space_after)
# Special case for slice syntax: foo[:10]
# Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
if token == ":" and brackets and brackets[-1] == "[":
this_wants_space_after = False
# Special case for foo(*args), foo(a, *args):
if token in ("*", "**") and prev_was_open_paren_or_comma:
this_wants_space_before = False
this_wants_space_after = False
# Special case for "a = foo(b=1)":
if token == "=" and not brackets:
this_wants_space_before = True
this_wants_space_after = True
# Special case for unary -, +. Our heuristic is that if we see the
# + or - after something that looks like an object (a NAME,
# NUMBER, STRING, or close paren) then it is probably binary,
# otherwise it is probably unary.
if token in ("+", "-") and not prev_was_object_like:
this_wants_space_before = False
this_wants_space_after = False
if prev_wants_space or this_wants_space_before:
text.append(" ")
text.append(token)
prev_wants_space = this_wants_space_after
prev_was_space_delim = False
if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
or token == ")"):
prev_was_object_like = True
else:
prev_was_object_like = False
prev_was_open_paren_or_comma = token in ("(", ",")
return "".join(text)
def normalize_token_spacing(code):
tokens = [(t[0], t[1])
for t in tokenize.generate_tokens(StringIO(code).readline)]
return pretty_untokenize(tokens)
def test_pretty_untokenize_and_normalize_token_spacing():
assert normalize_token_spacing("1 + 1") == "1 + 1"
assert normalize_token_spacing("1+1") == "1 + 1"
assert normalize_token_spacing("1*(2+3**2)") == "1 * (2 + 3 ** 2)"
assert normalize_token_spacing("a and b") == "a and b"
assert normalize_token_spacing("foo(a=bar.baz[1:])") == "foo(a=bar.baz[1:])"
assert normalize_token_spacing("""{"hi":foo[:]}""") == """{"hi": foo[:]}"""
assert normalize_token_spacing("""'a' "b" 'c'""") == """'a' "b" 'c'"""
assert normalize_token_spacing('"""a""" is 1 or 2==3') == '"""a""" is 1 or 2 == 3'
assert normalize_token_spacing("foo ( * args )") == "foo(*args)"
assert normalize_token_spacing("foo ( a * args )") == "foo(a * args)"
assert normalize_token_spacing("foo ( ** args )") == "foo(**args)"
assert normalize_token_spacing("foo ( a ** args )") == "foo(a ** args)"
assert normalize_token_spacing("foo (1, * args )") == "foo(1, *args)"
assert normalize_token_spacing("foo (1, a * args )") == "foo(1, a * args)"
assert normalize_token_spacing("foo (1, ** args )") == "foo(1, **args)"
assert normalize_token_spacing("foo (1, a ** args )") == "foo(1, a ** args)"
assert normalize_token_spacing("a=foo(b = 1)") == "a = foo(b=1)"
assert normalize_token_spacing("foo(+ 10, bar = - 1)") == "foo(+10, bar=-1)"
assert normalize_token_spacing("1 + +10 + -1 - 5") == "1 + +10 + -1 - 5"
|