/usr/lib/python2.7/dist-packages/html5_parser/soup.py is in python-html5-parser 0.4.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | #!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
unicode = type('')
def soup_module():
if soup_module.ans is None:
try:
import bs4
soup_module.ans = bs4
except ImportError:
import BeautifulSoup as bs3
soup_module.ans = bs3
return soup_module.ans
soup_module.ans = None
def set_soup_module(val):
soup_module.ans = val
def bs4_fast_append(self, new_child):
new_child.parent = self
if self.contents:
previous_child = self.contents[-1]
new_child.previous_sibling = previous_child
previous_child.next_sibling = new_child
new_child.previous_element = previous_child._last_descendant(False)
else:
new_child.previous_sibling = None
new_child.previous_element = self
new_child.previous_element.next_element = new_child
new_child.next_sibling = new_child.next_element = None
self.contents.append(new_child)
def bs4_new_tag(Tag, soup):
def new_tag(name, attrs):
return Tag(soup, name=name, attrs=attrs)
return new_tag
def bs3_fast_append(self, newChild):
newChild.parent = self
if self.contents:
previousChild = self.contents[-1]
newChild.previousSibling = previousChild
previousChild.nextSibling = newChild
newChild.previous = previousChild._lastRecursiveChild()
else:
newChild.previousSibling = None
newChild.previous = self
newChild.previous.next = newChild
newChild.nextSibling = newChild.next_element = None
self.contents.append(newChild)
def bs3_new_tag(Tag, soup):
def new_tag(name, attrs):
ans = Tag(soup, name)
ans.attrs = attrs.items()
ans.attrMap = attrs
return ans
return new_tag
VOID_ELEMENTS = frozenset(
'area base br col embed hr img input keygen link menuitem meta param source track wbr'.split())
def init_soup():
bs = soup_module()
if bs.__version__.startswith('3.'):
soup = bs.BeautifulSoup()
new_tag = bs3_new_tag(bs.Tag, soup)
append = bs3_fast_append
soup.isSelfClosing = lambda self, name: name in VOID_ELEMENTS
else:
soup = bs.BeautifulSoup('', 'lxml')
new_tag = bs4_new_tag(bs.Tag, soup)
append = bs4_fast_append
return bs, soup, new_tag, bs.Comment, append, bs.NavigableString
def parse(utf8_data, stack_size=16 * 1024, keep_doctype=False, return_root=True):
from . import html_parser
bs, soup, new_tag, Comment, append, NavigableString = init_soup()
if not isinstance(utf8_data, bytes):
utf8_data = utf8_data.encode('utf-8')
def add_doctype(name, public_id, system_id):
public_id = (' PUBLIC ' + public_id + ' ') if public_id else ''
system_id = (' ' + system_id) if system_id else ''
soup.append(bs.Doctype('<!DOCTYPE {}{}{}>'.format(name, public_id, system_id)))
dt = add_doctype if keep_doctype and hasattr(bs, 'Doctype') else None
root = html_parser.parse_and_build(
utf8_data, new_tag, Comment, NavigableString, append, dt, stack_size)
soup.append(root)
return root if return_root else soup
|