This file is indexed.

/usr/lib/python2.7/dist-packages/bibtexparser/bparser.py is in python-bibtexparser 0.6.2-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Original source: github.com/okfn/bibserver
# Authors:
# markmacgillivray
# Etienne Posthumus (epoz)
# Francois Boulogne <fboulogne at april dot org>

import sys
import logging
import io
import re
from .bibdatabase import BibDatabase

logger = logging.getLogger(__name__)

__all__ = ['BibTexParser']


if sys.version_info >= (3, 0):
    from io import StringIO
    ustr = str
else:
    from StringIO import StringIO
    ustr = unicode


class BibTexParser(object):
    """
    A parser for reading BibTeX bibliographic data files.

    Example::

        from bibtexparser.bparser import BibTexParser

        bibtex_str = ...

        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenise_fields = False
        bib_database = bibtexparser.loads(bibtex_str, parser)
    """

    def __new__(cls, data=None,
                customization=None,
                ignore_nonstandard_types=True,
                homogenise_fields=True):
        """
        To catch the old API structure in which creating the parser would immediately parse and return data.
        """

        if data is None:
            return super(BibTexParser, cls).__new__(cls)
        else:
            # For backwards compatibility: if data is given, parse and return the `BibDatabase` object instead of the
            # parser.
            parser = BibTexParser()
            parser.customization = customization
            parser.ignore_nonstandard_types = ignore_nonstandard_types
            parser.homogenise_fields = homogenise_fields
            return parser.parse(data)

    def __init__(self):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase()
        #: Callback function to process BibTeX entries after parsing, for example to create a list from a string with
        #: multiple values. By default all BibTeX values are treated as simple strings. Default: `None`.
        self.customization = None

        #: Ignore non-standard BibTeX types (`book`, `article`, etc). Default: `True`.
        self.ignore_nonstandard_types = True

        #: Sanitise BibTeX field names, for example change `url` to `link` etc. Field names are always converted to
        #: lowercase names. Default: `True`.
        self.homogenise_fields = True

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': 'keyword',
            'keywords': 'keyword',
            'authors': 'author',
            'editors': 'editor',
            'url': 'link',
            'urls': 'link',
            'links': 'link',
            'subjects': 'subject'
        }

        self.replace_all_re = re.compile(r'((?P<pre>"?)\s*(#|^)\s*(?P<id>[^\d\W]\w*)\s*(#|$)\s*(?P<post>"?))', re.UNICODE)

    def _bibtex_file_obj(self, bibtex_str):
        # Some files have Byte-order marks inserted at the start
        byte = '\xef\xbb\xbf'
        if not isinstance(byte, ustr):
            byte = ustr('\xef\xbb\xbf', self.encoding, 'ignore')
        if bibtex_str[:3] == byte:
            bibtex_str = bibtex_str[3:]
        return StringIO(bibtex_str)

    def parse(self, bibtex_str):
        """Parse a BibTeX string into an object

        :param bibtex_str: BibTeX string
        :type: str or unicode
        :return: bibliographic database
        :rtype: BibDatabase
        """
        self.bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
        self._parse_records(customization=self.customization)
        return self.bib_database

    def parse_file(self, file):
        """Parse a BibTeX file into an object

        :param file: BibTeX file or file-like object
        :type: file
        :return: bibliographic database
        :rtype: BibDatabase
        """
        return self.parse(file.read())

    def _parse_records(self, customization=None):
        """Parse the bibtex into a list of records.

        :param customization: a function
        """
        def _add_parsed_record(record, records):
            """
            Atomic function to parse a record
            and append the result in records
            """
            if record != "":
                logger.debug('The record is not empty. Let\'s parse it.')
                parsed = self._parse_record(record, customization=customization)
                if parsed:
                    logger.debug('Store the result of the parsed record')
                    records.append(parsed)
                else:
                    logger.debug('Nothing returned from the parsed record!')
            else:
                logger.debug('The record is empty')

        records = []
        record = ""
        # read each line, bundle them up until they form an object, then send for parsing
        for linenumber, line in enumerate(self.bibtex_file_obj):
            logger.debug('Inspect line %s', linenumber)
            if line.strip().startswith('@'):
                # Remove leading whitespaces
                line = line.lstrip()
                logger.debug('Line starts with @')
                # Parse previous record
                _add_parsed_record(record, records)
                # Start new record
                logger.debug('The record is set to empty')
                record = ""
            # Keep adding lines to the record
            record += line

        # catch any remaining record and send it for parsing
        _add_parsed_record(record, records)
        logger.debug('Set the list of entries')
        self.bib_database.entries = records

    def _parse_record(self, record, customization=None):
        """Parse a record.

        * tidy whitespace and other rubbish
        * parse out the bibtype and citekey
        * find all the key-value pairs it contains

        :param record: a record
        :param customization: a function

        :returns: dict --
        """
        d = {}

        if not record.startswith('@'):
            logger.debug('The record does not start with @. Return empty dict.')
            return {}

        # if a comment record, add to bib_database.comments
        if record.lower().startswith('@comment'):
            logger.debug('The record startswith @comment')
            logger.debug('Store comment in list of comments')

            self.bib_database.comments.append(re.search('\{(.*)\}', record, re.DOTALL).group(1))

            logger.debug('Return an empty dict')
            return {}

        # if a preamble record, add to bib_database.preambles
        if record.lower().startswith('@preamble'):
            logger.debug('The record startswith @preamble')
            logger.debug('Store preamble in list of preambles')

            self.bib_database.preambles.append(re.search('\{(.*)\}', record, re.DOTALL).group(1))

            logger.debug('Return an empty dict')
            return {}

        # prepare record
        record = '\n'.join([i.strip() for i in record.split('\n')])
        if '}\n' in record:
            logger.debug('}\\n detected in the record. Clean up.')
            record = record.replace('\r\n', '\n').replace('\r', '\n').rstrip('\n')
            # treat the case for which the last line of the record
            # does not have a coma
            if record.endswith('}\n}') or record.endswith('}}'):
                logger.debug('Missing coma in the last line of the record. Fix it.')
                record = re.sub('}(\n|)}$', '},\n}', record)

        # if a string record, put it in the replace_dict
        if record.lower().startswith('@string'):
            logger.debug('The record startswith @string')
            key, val = [i.strip().strip('{').strip('}').replace('\n', ' ') for i in record.split('{', 1)[1].strip('}').strip('\n').strip(',').split('=')]
            key = key.lower()  # key is case insensitive
            val = self._string_subst_partial(val)
            if val.startswith('"') or val.lower() not in self.bib_database.strings:
                self.bib_database.strings[key] = val.strip('"')
            else:
                self.bib_database.strings[key] = self.bib_database.strings[val.lower()]
            logger.debug('Return a dict')
            return d

        # for each line in record
        logger.debug('Split the record of its lines and treat them')
        kvs = [i.strip() for i in re.split(',\s*\n|\n\s*,', record)]
        inkey = ""
        inval = ""
        for kv in kvs:
            logger.debug('Inspect: %s', kv)
            # TODO: We may check that the keyword belongs to a known type
            if kv.startswith('@') and not inkey:
                # it is the start of the record - set the bibtype and citekey (id)
                logger.debug('Line starts with @ and the key is not stored yet.')
                bibtype, id = kv.split('{', 1)
                bibtype = self._add_key(bibtype)
                id = id.lstrip().strip('}').strip(',')
                logger.debug('bibtype = %s', bibtype)
                logger.debug('id = %s', id)
                if self.ignore_nonstandard_types and bibtype not in ('article',
                                                                     'book',
                                                                     'booklet',
                                                                     'conference',
                                                                     'inbook',
                                                                     'incollection',
                                                                     'inproceedings',
                                                                     'manual',
                                                                     'mastersthesis',
                                                                     'misc',
                                                                     'phdthesis',
                                                                     'proceedings',
                                                                     'techreport',
                                                                     'unpublished'):
                    logger.warning('Entry type %s not standard. Not considered.', bibtype)
                    break
            elif '=' in kv and not inkey:
                # it is a line with a key value pair on it
                logger.debug('Line contains a key-pair value and the key is not stored yet.')
                key, val = [i.strip() for i in kv.split('=', 1)]
                key = self._add_key(key)
                val = self._string_subst_partial(val)
                # if it looks like the value spans lines, store details for next loop
                if (val.count('{') != val.count('}')) or (val.startswith('"') and not val.replace('}', '').endswith('"')):
                    logger.debug('The line is not ending the record.')
                    inkey = key
                    inval = val
                else:
                    logger.debug('The line is the end of the record.')
                    d[key] = self._add_val(val)
            elif inkey:
                logger.debug('Continues the previous line to complete the key pair value...')
                # if this line continues the value from a previous line, append
                inval += ', ' + kv
                # if it looks like this line finishes the value, store it and clear for next loop
                if (inval.startswith('{') and inval.endswith('}')) or (inval.startswith('"') and inval.endswith('"')):
                    logger.debug('This line represents the end of the current key-pair value')
                    d[inkey] = self._add_val(inval)
                    inkey = ""
                    inval = ""
                else:
                    logger.debug('This line does NOT represent the end of the current key-pair value')

        logger.debug('All lines have been treated')
        if not d:
            logger.debug('The dict is empty, return it.')
            return d

        d['ENTRYTYPE'] = bibtype
        d['ID'] = id

        if customization is None:
            logger.debug('No customization to apply, return dict')
            return d
        else:
            # apply any customizations to the record object then return it
            logger.debug('Apply customizations and return dict')
            return customization(d)

    def _strip_quotes(self, val):
        """Strip double quotes enclosing string

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        logger.debug('Strip quotes')
        val = val.strip()
        if val.startswith('"') and val.endswith('"'):
            return val[1:-1]
        return val

    def _strip_braces(self, val):
        """Strip braces enclosing string

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        logger.debug('Strip braces')
        val = val.strip()
        if val.startswith('{') and val.endswith('}') and self._full_span(val):
            return val[1:-1]
        return val

    def _full_span(self, val):
        cnt = 0
        for i in range(0, len(val)):
                if val[i] == '{':
                        cnt += 1
                elif val[i] == '}':
                        cnt -= 1
                if cnt == 0:
                        break
        if i == len(val) - 1:
                return True
        else:
                return False

    def _string_subst(self, val):
        """ Substitute string definitions

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        logger.debug('Substitute string definitions')
        if not val:
            return ''
        for k in list(self.bib_database.strings.keys()):
            if val.lower() == k:
                val = self.bib_database.strings[k]
        if not isinstance(val, ustr):
            val = ustr(val, self.encoding, 'ignore')

        return val

    def _string_subst_partial(self, val):
        """ Substitute string definitions inside larger expressions

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        def repl(m):
            k = m.group('id')
            replacement = self.bib_database.strings[k.lower()] if k.lower() in self.bib_database.strings else k
            pre = '"' if m.group('pre') != '"' else ''
            post = '"' if m.group('post') != '"' else ''
            return pre + replacement + post

        logger.debug('Substitute string definitions inside larger expressions')
        if '#' not in val:
            return val

        # TODO?: Does not match two subsequent variables or strings, such as  "start" # foo # bar # "end"  or  "start" # "end".
        # TODO:  Does not support braces instead of quotes, e.g.: {start} # foo # {bar}
        # TODO:  Does not support strings like: "te#s#t"
        return self.replace_all_re.sub(repl, val)

    def _add_val(self, val):
        """ Clean instring before adding to dictionary

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        if not val or val == "{}":
            return ''
        val = self._strip_braces(val)
        val = self._strip_quotes(val)
        val = self._strip_braces(val)
        val = self._string_subst(val)
        return val

    def _add_key(self, key):
        """ Add a key and homogeneize alternative forms.

        :param key: a key
        :type key: string
        :returns: string -- value
        """
        key = key.strip().strip('@').lower()
        if self.homogenise_fields:
            if key in list(self.alt_dict.keys()):
                key = self.alt_dict[key]
        if not isinstance(key, ustr):
            return ustr(key, 'utf-8')
        else:
            return key