This file is indexed.

/usr/lib/python2.7/dist-packages/html5_parser/soup.py is in python-html5-parser 0.4.4-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>

from __future__ import absolute_import, division, print_function, unicode_literals

unicode = type('')


def soup_module():
    if soup_module.ans is None:
        try:
            import bs4
            soup_module.ans = bs4
        except ImportError:
            import BeautifulSoup as bs3
            soup_module.ans = bs3
    return soup_module.ans


soup_module.ans = None


def set_soup_module(val):
    soup_module.ans = val


def bs4_fast_append(self, new_child):
    new_child.parent = self
    if self.contents:
        previous_child = self.contents[-1]
        new_child.previous_sibling = previous_child
        previous_child.next_sibling = new_child
        new_child.previous_element = previous_child._last_descendant(False)
    else:
        new_child.previous_sibling = None
        new_child.previous_element = self
    new_child.previous_element.next_element = new_child
    new_child.next_sibling = new_child.next_element = None
    self.contents.append(new_child)


def bs4_new_tag(Tag, soup):

    def new_tag(name, attrs):
        return Tag(soup, name=name, attrs=attrs)

    return new_tag


def bs3_fast_append(self, newChild):
    newChild.parent = self
    if self.contents:
        previousChild = self.contents[-1]
        newChild.previousSibling = previousChild
        previousChild.nextSibling = newChild
        newChild.previous = previousChild._lastRecursiveChild()
    else:
        newChild.previousSibling = None
        newChild.previous = self
    newChild.previous.next = newChild

    newChild.nextSibling = newChild.next_element = None
    self.contents.append(newChild)


def bs3_new_tag(Tag, soup):

    def new_tag(name, attrs):
        ans = Tag(soup, name)
        ans.attrs = attrs.items()
        ans.attrMap = attrs
        return ans

    return new_tag


VOID_ELEMENTS = frozenset(
    'area base br col embed hr img input keygen link menuitem meta param source track wbr'.split())


def init_soup():
    bs = soup_module()
    if bs.__version__.startswith('3.'):
        soup = bs.BeautifulSoup()
        new_tag = bs3_new_tag(bs.Tag, soup)
        append = bs3_fast_append
        soup.isSelfClosing = lambda self, name: name in VOID_ELEMENTS
    else:
        soup = bs.BeautifulSoup('', 'lxml')
        new_tag = bs4_new_tag(bs.Tag, soup)
        append = bs4_fast_append
    return bs, soup, new_tag, bs.Comment, append, bs.NavigableString


def parse(utf8_data, stack_size=16 * 1024, keep_doctype=False, return_root=True):
    from . import html_parser
    bs, soup, new_tag, Comment, append, NavigableString = init_soup()
    if not isinstance(utf8_data, bytes):
        utf8_data = utf8_data.encode('utf-8')

    def add_doctype(name, public_id, system_id):
        public_id = (' PUBLIC ' + public_id + ' ') if public_id else ''
        system_id = (' ' + system_id) if system_id else ''
        soup.append(bs.Doctype('<!DOCTYPE {}{}{}>'.format(name, public_id, system_id)))

    dt = add_doctype if keep_doctype and hasattr(bs, 'Doctype') else None
    root = html_parser.parse_and_build(
        utf8_data, new_tag, Comment, NavigableString, append, dt, stack_size)
    soup.append(root)
    return root if return_root else soup