/usr/share/doc/diveintopython/examples/pyfontify.py is in diveintopython 5.4-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | """Module to analyze Python source code; for syntax coloring tools.
Interface:
tags = fontify(pytext, searchfrom, searchto)
The 'pytext' argument is a string containing Python source code.
The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
The returned value is a list of tuples, formatted like this:
[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
The tuple contents are always like this:
(tag, startindex, endindex, sublist)
tag is one of ('comment', 'string', 'keyword', 'function', 'class')
sublist is not used, hence always None.
"""
# Based on FontText.py by Mitchell S. Chapman,
# which was modified by Zachary Roadhouse,
# then un-Tk'd by Just van Rossum.
# Many thanks for regular expression debugging & authoring are due to:
# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
# So, who owns the copyright? ;-) How about this:
# Copyright 1996-1997:
# Mitchell S. Chapman,
# Zachary Roadhouse,
# Tim Peters,
# Just van Rossum
#
# Version 0.4 - changes copyright (C) 2001 Mark Pilgrim (mark@diveintopython.org)
# 2001/02/05 - MAP - distinguish between class and function identifiers
# 2001/03/21 - MAP - get keywords from keyword module (instead of hard-coded list)
# 2001/03/22 - MAP - use re module instead of deprecated regex module
__version__ = "0.4"
import string, re, keyword
# Build up a regular expression which will match anything
# interesting, including multi-line triple-quoted strings.
commentPat = "#.*"
pat = "q[^\q\n]*(\\\\[\000-\377][^\q\n]*)*q"
quotePat = string.replace(pat, "q", "'") + "|" + string.replace(pat, 'q', '"')
# Way to go, Tim!
pat = """
qqq
[^\\q]*
(
( \\\\[\000-\377]
| q
( \\\\[\000-\377]
| [^\\q]
| q
( \\\\[\000-\377]
| [^\\q]
)
)
)
[^\\q]*
)*
qqq
"""
pat = string.join(string.split(pat), '') # get rid of whitespace
tripleQuotePat = string.replace(pat, "q", "'") + "|" + string.replace(pat, 'q', '"')
# Build up a regular expression which matches all and only
# Python keywords. This will let us skip the uninteresting
# identifier references.
# nonKeyPat identifies characters which may legally precede
# a keyword pattern.
nonKeyPat = "(^|[^a-zA-Z0-9_.\"'])"
keywordsPat = string.join(keyword.kwlist, "|")
keyPat = nonKeyPat + "(" + keywordsPat + ")" + nonKeyPat
matchPat = keyPat + "|" + commentPat + "|" + tripleQuotePat + "|" + quotePat
matchRE = re.compile(matchPat)
idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
idRE = re.compile(idKeyPat)
def fontify(pytext, searchfrom=0, searchto=None):
if searchto is None:
searchto = len(pytext)
tags = []
commentTag = 'comment'
stringTag = 'string'
keywordTag = 'keyword'
functionTag = 'function'
classTag = 'class'
start = 0
end = searchfrom
while 1:
matchObject = matchRE.search(pytext, end)
if not matchObject: break
(start, end) = matchObject.span()
match = matchObject.group(0)
c = match[0]
if c not in "#'\"":
# Must have matched a keyword.
if start <> searchfrom:
# there's still a redundant char before and after it, strip!
match = match[1:-1]
start = start + 1
else:
# this is the first keyword in the text.
# Only a space at the end.
match = match[:-1]
end = end - 1
tags.append((keywordTag, start, end, None))
# If this was a defining keyword, look ahead to the
# following identifier.
if match in ["def", "class"]:
idMatchObject = idRE.search(pytext, end)
if idMatchObject:
(start, end) = idMatchObject.span()
match = idMatchObject.group(0)
tags.append(((match=='def') and functionTag or classTag, start, end, None))
elif c == "#":
tags.append((commentTag, start, end, None))
else:
tags.append((stringTag, start, end, None))
return tags
def test(path):
f = open(path)
text = f.read()
f.close()
tags = fontify(text)
for tag, start, end, sublist in tags:
print tag, `text[start:end]`, start, end
|