This file is indexed.

/usr/share/pyshared/epydoc/markup/pyval_repr.py is in python-epydoc 3.0.1+dfsg-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
# epydoc -- Marked-up Representations for Python Values
#
# Copyright (C) 2005 Edward Loper
# Author: Edward Loper <edloper@loper.org>
# URL: <http://epydoc.sf.net>
#
# $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $

"""
Syntax highlighter for Python values.  Currently provides special
colorization support for:

  - lists, tuples, sets, frozensets, dicts
  - numbers
  - strings
  - compiled regexps

The highlighter also takes care of line-wrapping, and automatically
stops generating repr output as soon as it has exceeded the specified
number of lines (which should make it faster than pprint for large
values).  It does I{not} bother to do automatic cycle detection,
because maxlines is typically around 5, so it's really not worth it.

The syntax-highlighted output is encoded using a
L{ParsedEpytextDocstring}, which can then be used to generate output in
a variety of formats.
"""
__docformat__ = 'epytext en'

# Implementation note: we use exact tests for classes (list, etc)
# rather than using isinstance, because subclasses might override
# __repr__.

import types, re
import epydoc.apidoc
from epydoc.util import decode_with_backslashreplace
from epydoc.util import plaintext_to_html, plaintext_to_latex
from epydoc.compat import *
import sre_parse, sre_constants

from epydoc.markup.epytext import Element, ParsedEpytextDocstring

def is_re_pattern(pyval):
    return type(pyval).__name__ == 'SRE_Pattern'

class _ColorizerState:
    """
    An object uesd to keep track of the current state of the pyval
    colorizer.  The L{mark()}/L{restore()} methods can be used to set
    a backup point, and restore back to that backup point.  This is
    used by several colorization methods that first try colorizing
    their object on a single line (setting linebreakok=False); and
    then fall back on a multi-line output if that fails.  The L{score}
    variable is used to keep track of a 'score', reflecting how good
    we think this repr is.  E.g., unhelpful values like '<Foo instance
    at 0x12345>' get low scores.  If the score is too low, we'll use
    the parse-derived repr instead.
    """
    def __init__(self):
        self.result = []
        self.charpos = 0
        self.lineno = 1
        self.linebreakok = True
        
        #: How good this represention is?
        self.score = 0

    def mark(self):
        return (len(self.result), self.charpos,
                self.lineno, self.linebreakok, self.score)

    def restore(self, mark):
        n, self.charpos, self.lineno, self.linebreakok, self.score = mark
        del self.result[n:]

class _Maxlines(Exception):
    """A control-flow exception that is raised when PyvalColorizer
    exeeds the maximum number of allowed lines."""
    
class _Linebreak(Exception):
    """A control-flow exception that is raised when PyvalColorizer
    generates a string containing a newline, but the state object's
    linebreakok variable is False."""

class ColorizedPyvalRepr(ParsedEpytextDocstring):
    """
    @ivar score: A score, evaluating how good this repr is.
    @ivar is_complete: True if this colorized repr completely describes
       the object.
    """
    def __init__(self, tree, score, is_complete):
        ParsedEpytextDocstring.__init__(self, tree)
        self.score = score
        self.is_complete = is_complete

def colorize_pyval(pyval, parse_repr=None, min_score=None,
                   linelen=75, maxlines=5, linebreakok=True, sort=True):
    return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize(
        pyval, parse_repr, min_score)

class PyvalColorizer:
    """
    Syntax highlighter for Python values.
    """

    def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
        self.linelen = linelen
        self.maxlines = maxlines
        self.linebreakok = linebreakok
        self.sort = sort

    #////////////////////////////////////////////////////////////
    # Colorization Tags & other constants
    #////////////////////////////////////////////////////////////

    GROUP_TAG = 'variable-group'     # e.g., "[" and "]"
    COMMA_TAG = 'variable-op'        # The "," that separates elements
    COLON_TAG = 'variable-op'        # The ":" in dictionaries
    CONST_TAG = None                 # None, True, False
    NUMBER_TAG = None                # ints, floats, etc
    QUOTE_TAG = 'variable-quote'     # Quotes around strings.
    STRING_TAG = 'variable-string'   # Body of string literals

    RE_CHAR_TAG = None
    RE_GROUP_TAG = 're-group'
    RE_REF_TAG = 're-ref'
    RE_OP_TAG = 're-op'
    RE_FLAGS_TAG = 're-flags'

    ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
    LINEWRAP = Element('symbol', u'crarr')
    UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
    
    GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)

    ESCAPE_UNICODE = False # should we escape non-ascii unicode chars?

    #////////////////////////////////////////////////////////////
    # Entry Point
    #////////////////////////////////////////////////////////////

    def colorize(self, pyval, parse_repr=None, min_score=None):
        """
        @return: A L{ColorizedPyvalRepr} describing the given pyval.
        """
        UNKNOWN = epydoc.apidoc.UNKNOWN
        # Create an object to keep track of the colorization.
        state = _ColorizerState()
        state.linebreakok = self.linebreakok
        # Colorize the value.  If we reach maxlines, then add on an
        # ellipsis marker and call it a day.
        try:
            if pyval is not UNKNOWN:
                self._colorize(pyval, state)
            elif parse_repr not in (None, UNKNOWN):
                self._output(parse_repr, None, state)
            else:
                state.result.append(PyvalColorizer.UNKNOWN_REPR)
            is_complete = True
        except (_Maxlines, _Linebreak):
            if self.linebreakok:
                state.result.append('\n')
                state.result.append(self.ELLIPSIS)
            else:
                if state.result[-1] is self.LINEWRAP:
                    state.result.pop()
                self._trim_result(state.result, 3)
                state.result.append(self.ELLIPSIS)
            is_complete = False
        # If we didn't score high enough, then try again.
        if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
            and min_score is not None and state.score < min_score):
            return self.colorize(UNKNOWN, parse_repr)
        # Put it all together.
        tree = Element('epytext', *state.result)
        return ColorizedPyvalRepr(tree, state.score, is_complete)

    def _colorize(self, pyval, state):
        pyval_type = type(pyval)
        state.score += 1
        
        if pyval is None or pyval is True or pyval is False:
            self._output(unicode(pyval), self.CONST_TAG, state)
        elif pyval_type in (int, float, long, types.ComplexType):
            self._output(unicode(pyval), self.NUMBER_TAG, state)
        elif pyval_type is str:
            self._colorize_str(pyval, state, '', 'string-escape')
        elif pyval_type is unicode:
            if self.ESCAPE_UNICODE:
                self._colorize_str(pyval, state, 'u', 'unicode-escape')
            else:
                self._colorize_str(pyval, state, 'u', None)
        elif pyval_type is list:
            self._multiline(self._colorize_iter, pyval, state, '[', ']')
        elif pyval_type is tuple:
            self._multiline(self._colorize_iter, pyval, state, '(', ')')
        elif pyval_type is set:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'set([', '])')
        elif pyval_type is frozenset:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'frozenset([', '])')
        elif pyval_type is dict:
            self._multiline(self._colorize_dict, self._sort(pyval.items()),
                            state, '{', '}')
        elif is_re_pattern(pyval):
            self._colorize_re(pyval, state)
        else:
            try:
                pyval_repr = repr(pyval)
                if not isinstance(pyval_repr, (str, unicode)):
                    pyval_repr = unicode(pyval_repr)
                pyval_repr_ok = True
            except KeyboardInterrupt:
                raise
            except:
                pyval_repr_ok = False
                state.score -= 100

            if pyval_repr_ok:
                if self.GENERIC_OBJECT_RE.match(pyval_repr):
                    state.score -= 5
                self._output(pyval_repr, None, state)
            else:
                state.result.append(self.UNKNOWN_REPR)

    def _sort(self, items):
        if not self.sort: return items
        try: return sorted(items)
        except KeyboardInterrupt: raise
        except: return items
        
    def _trim_result(self, result, num_chars):
        while num_chars > 0:
            if not result: return 
            if isinstance(result[-1], Element):
                assert len(result[-1].children) == 1
                trim = min(num_chars, len(result[-1].children[0]))
                result[-1].children[0] = result[-1].children[0][:-trim]
                if not result[-1].children[0]: result.pop()
                num_chars -= trim
            else:
                trim = min(num_chars, len(result[-1]))
                result[-1] = result[-1][:-trim]
                if not result[-1]: result.pop()
                num_chars -= trim

    #////////////////////////////////////////////////////////////
    # Object Colorization Functions
    #////////////////////////////////////////////////////////////

    def _multiline(self, func, pyval, state, *args):
        """
        Helper for container-type colorizers.  First, try calling
        C{func(pyval, state, *args)} with linebreakok set to false;
        and if that fails, then try again with it set to true.
        """
        linebreakok = state.linebreakok
        mark = state.mark()
        
        try:
            state.linebreakok = False
            func(pyval, state, *args)
            state.linebreakok = linebreakok

        except _Linebreak:
            if not linebreakok:
                raise
            state.restore(mark)
            func(pyval, state, *args)
            
    def _colorize_iter(self, pyval, state, prefix, suffix):
        self._output(prefix, self.GROUP_TAG, state)
        indent = state.charpos
        for i, elt in enumerate(pyval):
            if i>=1:
                if state.linebreakok:
                    self._output(',', self.COMMA_TAG, state)
                    self._output('\n'+' '*indent, None, state)
                else:
                    self._output(', ', self.COMMA_TAG, state)
            self._colorize(elt, state)
        self._output(suffix, self.GROUP_TAG, state)

    def _colorize_dict(self, items, state, prefix, suffix):
        self._output(prefix, self.GROUP_TAG, state)
        indent = state.charpos
        for i, (key, val) in enumerate(items):
            if i>=1:
                if state.linebreakok:
                    self._output(',', self.COMMA_TAG, state)
                    self._output('\n'+' '*indent, None, state)
                else:
                    self._output(', ', self.COMMA_TAG, state)
            self._colorize(key, state)
            self._output(': ', self.COLON_TAG, state)
            self._colorize(val, state)
        self._output(suffix, self.GROUP_TAG, state)

    def _colorize_str(self, pyval, state, prefix, encoding):
        # Decide which quote to use.
        if '\n' in pyval and state.linebreakok: quote = "'''"
        else: quote = "'"
        # Divide the string into lines.
        if state.linebreakok:
            lines = pyval.split('\n')
        else:
            lines = [pyval]
        # Open quote.
        self._output(prefix+quote, self.QUOTE_TAG, state)
        # Body
        for i, line in enumerate(lines):
            if i>0: self._output('\n', None, state)
            if encoding: line = line.encode(encoding)
            self._output(line, self.STRING_TAG, state)
        # Close quote.
        self._output(quote, self.QUOTE_TAG, state)

    def _colorize_re(self, pyval, state):
        # Extract the flag & pattern from the regexp.
        pat, flags = pyval.pattern, pyval.flags
        # If the pattern is a string, decode it to unicode.
        if isinstance(pat, str):
            pat = decode_with_backslashreplace(pat)
        # Parse the regexp pattern.
        tree = sre_parse.parse(pat, flags)
        groups = dict([(num,name) for (name,num) in
                       tree.pattern.groupdict.items()])
        # Colorize it!
        self._output("re.compile(r'", None, state)
        self._colorize_re_flags(tree.pattern.flags, state)
        self._colorize_re_tree(tree, state, True, groups)
        self._output("')", None, state)

    def _colorize_re_flags(self, flags, state):
        if flags:
            flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
                     if (n&flags)]
            flags = '(?%s)' % ''.join(flags)
            self._output(flags, self.RE_FLAGS_TAG, state)

    def _colorize_re_tree(self, tree, state, noparen, groups):
        assert noparen in (True, False)
        if len(tree) > 1 and not noparen:
            self._output('(', self.RE_GROUP_TAG, state)
        for elt in tree:
            op = elt[0]
            args = elt[1]
    
            if op == sre_constants.LITERAL:
                c = unichr(args)
                # Add any appropriate escaping.
                if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
                elif c == '\t': c = '\\t'
                elif c == '\r': c = '\\r'
                elif c == '\n': c = '\\n'
                elif c == '\f': c = '\\f'
                elif c == '\v': c = '\\v'
                elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
                elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
                elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
                self._output(c, self.RE_CHAR_TAG, state)
            
            elif op == sre_constants.ANY:
                self._output('.', self.RE_CHAR_TAG, state)
                
            elif op == sre_constants.BRANCH:
                if args[0] is not None:
                    raise ValueError('Branch expected None arg but got %s'
                                     % args[0])
                for i, item in enumerate(args[1]):
                    if i > 0:
                        self._output('|', self.RE_OP_TAG, state)
                    self._colorize_re_tree(item, state, True, groups)
                
            elif op == sre_constants.IN:
                if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
                    self._colorize_re_tree(args, state, False, groups)
                else:
                    self._output('[', self.RE_GROUP_TAG, state)
                    self._colorize_re_tree(args, state, True, groups)
                    self._output(']', self.RE_GROUP_TAG, state)
                    
            elif op == sre_constants.CATEGORY:
                if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
                elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
                elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
                elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
                elif args == sre_constants.CATEGORY_WORD: val = r'\w'
                elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
                else: raise ValueError('Unknown category %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)
                
            elif op == sre_constants.AT:
                if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
                elif args == sre_constants.AT_BEGINNING: val = r'^'
                elif args == sre_constants.AT_END: val = r'$'
                elif args == sre_constants.AT_BOUNDARY: val = r'\b'
                elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
                elif args == sre_constants.AT_END_STRING: val = r'\Z'
                else: raise ValueError('Unknown position %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)
                
            elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
                minrpt = args[0]
                maxrpt = args[1]
                if maxrpt == sre_constants.MAXREPEAT:
                    if minrpt == 0:   val = '*'
                    elif minrpt == 1: val = '+'
                    else: val = '{%d,}' % (minrpt)
                elif minrpt == 0:
                    if maxrpt == 1: val = '?'
                    else: val = '{,%d}' % (maxrpt)
                elif minrpt == maxrpt:
                    val = '{%d}' % (maxrpt)
                else:
                    val = '{%d,%d}' % (minrpt, maxrpt)
                if op == sre_constants.MIN_REPEAT:
                    val += '?'
                    
                self._colorize_re_tree(args[2], state, False, groups)
                self._output(val, self.RE_OP_TAG, state)
                
            elif op == sre_constants.SUBPATTERN:
                if args[0] is None:
                    self._output('(?:', self.RE_GROUP_TAG, state)
                elif args[0] in groups:
                    self._output('(?P<', self.RE_GROUP_TAG, state)
                    self._output(groups[args[0]], self.RE_REF_TAG, state)
                    self._output('>', self.RE_GROUP_TAG, state)
                elif isinstance(args[0], (int, long)):
                    # This is cheating:
                    self._output('(', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?P<', self.RE_GROUP_TAG, state)
                    self._output(args[0], self.RE_REF_TAG, state)
                    self._output('>', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
    
            elif op == sre_constants.GROUPREF:
                self._output('\\%d' % args, self.RE_REF_TAG, state)
    
            elif op == sre_constants.RANGE:
                self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
                                        state, False, groups )
                self._output('-', self.RE_OP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
                                        state, False, groups )
                
            elif op == sre_constants.NEGATE:
                self._output('^', self.RE_OP_TAG, state)
    
            elif op == sre_constants.ASSERT:
                if args[0] > 0:
                    self._output('(?=', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?<=', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
                               
            elif op == sre_constants.ASSERT_NOT:
                if args[0] > 0:
                    self._output('(?!', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?<!', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
    
            elif op == sre_constants.NOT_LITERAL:
                self._output('[^', self.RE_GROUP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args),),
                                        state, False, groups )
                self._output(']', self.RE_GROUP_TAG, state)
            else:
                log.error("Error colorizing regexp: unknown elt %r" % elt)
        if len(tree) > 1 and not noparen: 
            self._output(')', self.RE_GROUP_TAG, state)
                           
    #////////////////////////////////////////////////////////////
    # Output function
    #////////////////////////////////////////////////////////////

    def _output(self, s, tag, state):
        """
        Add the string `s` to the result list, tagging its contents
        with tag `tag`.  Any lines that go beyond `self.linelen` will
        be line-wrapped.  If the total number of lines exceeds
        `self.maxlines`, then raise a `_Maxlines` exception.
        """
        # Make sure the string is unicode.
        if isinstance(s, str):
            s = decode_with_backslashreplace(s)
        
        # Split the string into segments.  The first segment is the
        # content to add to the current line, and the remaining
        # segments are new lines.
        segments = s.split('\n')

        for i, segment in enumerate(segments):
            # If this isn't the first segment, then add a newline to
            # split it from the previous segment.
            if i > 0:
                if (state.lineno+1) > self.maxlines:
                    raise _Maxlines()
                if not state.linebreakok:
                    raise _Linebreak()
                state.result.append(u'\n')
                state.lineno += 1
                state.charpos = 0

            # If the segment fits on the current line, then just call
            # markup to tag it, and store the result.
            if state.charpos + len(segment) <= self.linelen:
                state.charpos += len(segment)
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result.append(segment)

            # If the segment doesn't fit on the current line, then
            # line-wrap it, and insert the remainder of the line into
            # the segments list that we're iterating over.  (We'll go
            # the the beginning of the next line at the start of the
            # next iteration through the loop.)
            else:
                split = self.linelen-state.charpos
                segments.insert(i+1, segment[split:])
                segment = segment[:split]
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result += [segment, self.LINEWRAP]