/usr/share/pyshared/phenny/modules/codepoints.py is in phenny 2~hg28-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
codepoints.py - Phenny Codepoints Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import re, unicodedata
from itertools import islice
def about(u, cp=None, name=None):
if cp is None:
cp = ord(u)
if name is None:
try: name = unicodedata.name(u)
except ValueError:
return 'U+%04X (No name found)' % cp
if not unicodedata.combining(u):
template = 'U+%04X %s (%s)'
else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
return template % (cp, name, u.encode('utf-8'))
def codepoint_simple(arg):
arg = arg.upper()
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
results = []
for cp in xrange(0xFFFF):
u = unichr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
for cp in xrange(0xFFFF):
u = unichr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
return None
length, u, cp, name = sorted(results)[0]
return about(u, cp, name)
def codepoint_extended(arg):
arg = arg.upper()
try: r_search = re.compile(arg)
except: raise ValueError('Broken regexp: %r' % arg)
for cp in xrange(1, 0x10FFFF):
u = unichr(cp)
name = unicodedata.name(u, '-')
if r_search.search(name):
yield about(u, cp, name)
def u(phenny, input):
"""Look up unicode information."""
arg = input.bytes[3:]
# phenny.msg('#inamidst', '%r' % arg)
if not arg:
return phenny.reply('You gave me zero length input.')
elif not arg.strip(' '):
if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
return phenny.reply('1 SPACE (U+0020)')
# @@ space
if set(arg.upper()) - set(
'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'):
printable = False
elif len(arg) > 1:
printable = True
else: printable = False
if printable:
extended = False
for c in '.?+*{}[]\\/^$':
if c in arg:
extended = True
break
if len(arg) == 4:
try: u = unichr(int(arg, 16))
except ValueError: pass
else: return phenny.say(about(u))
if extended:
# look up a codepoint with regexp
results = list(islice(codepoint_extended(arg), 4))
for i, result in enumerate(results):
if (i < 2) or ((i == 2) and (len(results) < 4)):
phenny.say(result)
elif (i == 2) and (len(results) > 3):
phenny.say(result + ' [...]')
if not results:
phenny.reply('Sorry, no results')
else:
# look up a codepoint freely
result = codepoint_simple(arg)
if result is not None:
phenny.say(result)
else: phenny.reply("Sorry, no results for %r." % arg)
else:
text = arg.decode('utf-8')
# look up less than three podecoints
if len(text) <= 3:
for u in text:
phenny.say(about(u))
# look up more than three podecoints
elif len(text) <= 10:
phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u']
u.example = '.u 203D'
def bytes(phenny, input):
"""Show the input as pretty printed bytes."""
b = input.bytes
phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes']
bytes.example = '.bytes \xe3\x8b\xa1'
if __name__ == '__main__':
print __doc__.strip()
|