This file is indexed.

/usr/lib/python3/dist-packages/plainbox/impl/xscanners.py is in python3-plainbox 0.25-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# This file is part of Checkbox.
#
# Copyright 2012-2015 Canonical Ltd.
# Written by:
#   Zygmunt Krynicki <zygmunt.krynicki@canonical.com>
#
# Checkbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3,
# as published by the Free Software Foundation.
#
# Checkbox is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Checkbox.  If not, see <http://www.gnu.org/licenses/>.
import logging

from plainbox.vendor.enum import Enum, unique


__all__ = ['WordScanner']

_logger = logging.getLogger("plainbox.xscanners")


class ScannerBase:

    def __init__(self, text):
        self._text = text
        self._text_len = len(text)
        self._pos = 0

    def __iter__(self):
        return self

    def __next__(self):
        token, lexeme = self.get_token()
        if token is self.TOKEN_EOF:
            raise StopIteration
        return token, lexeme

    def get_token(self):
        """
        Get the next pair (token, lexeme)
        """
        _logger.debug("inner: get_token()")
        state = self.STATE_START
        lexeme = ""
        stack = [self.STATE_BAD]
        while state is not self.STATE_ERROR:
            _logger.debug("inner: ------ (next loop)")
            _logger.debug("inner: text:   %r", self._text)
            _logger.debug("                %s^ (pos: %d of %d)",
                          '-' * self._pos, self._pos, self._text_len)
            char = self._next_char()
            _logger.debug("inner: char:   %r", char)
            _logger.debug("inner: state:  %s", state)
            _logger.debug("inner: stack:  %s", stack)
            _logger.debug("inner: lexeme: %r", lexeme)
            lexeme += char
            if state.is_accepting:
                stack[:] = ()
                _logger.debug("inner: rollback stack cleared")
            stack.append(state)
            state = self._next_state_for(state, char)
            _logger.debug("inner: state becomes %s", state)
        if state is self.STATE_ERROR:
            _logger.debug("inner/rollback: REACHED ERROR STATE, ROLLING BACK")
            while (not state.is_accepting and state is not self.STATE_BAD):
                state = stack.pop()
                _logger.debug("inner/rollback: popped new state %s", state)
                lexeme = lexeme[:-1]
                _logger.debug("inner/rollback: lexeme trimmed to: %r", lexeme)
                self._rollback()
            _logger.debug("inner/rollback: DONE")
        lexeme = lexeme.rstrip("\0")
        lexeme = state.modify_lexeme(lexeme)
        if state.is_accepting:
            _logger.debug(
                "inner: accepting/returning: %r, %r", state.token, lexeme)
            return state.token, lexeme
        else:
            _logger.debug("inner: not accepting: %r", state)
            return state.token, None

    def _rollback(self):
        if self._pos > 0:
            self._pos -= 1
        else:
            assert False, "rolling back before start of input?"

    def _next_char(self):
        assert self._pos >= 0
        if self._pos < self._text_len:
            char = self._text[self._pos]
            self._pos += 1
            return char
        else:
            # NOTE: this solves a lot of problems
            self._pos = self._text_len + 1
            return '\0'

    def _next_state_for(self, state, char):
        raise NotImplementedError


@unique
class WordScannerToken(Enum):
    """ Token kind produced by :class:`WordScanner` """
    INVALID = -1
    EOF = 0
    WORD = 1
    SPACE = 2
    COMMENT = 3
    COMMA = 4
    EQUALS = 5

    @property
    def is_irrelevant(self):
        return self in (WordScannerToken.SPACE, WordScannerToken.COMMENT)


@unique
class WordScannerState(Enum):
    """ State of the :class:`WordScanner` """
    BAD = -1  # the bad state, used only once as a canary
    START = 0  # the initial state
    EOF = 1  # state for end-of-input
    ERROR = 2  # state for all kinds of bad input
    BARE_WORD = 3  # state when we're seeing bare words
    QUOTED_WORD_INNER = 4  # state when we're seeing "-quoted word
    QUOTED_WORD_END = 5
    SPACE = 6  # state when we're seeing spaces
    COMMENT_INNER = 7  # state when we're seeing comments
    COMMENT_END = 8  # state when we've seen \n or ''
    COMMA = 9  # state where we saw a comma
    EQUALS = 10  # state where we saw the equals sign

    @property
    def is_accepting(self):
        return self in WordScannerState._ACCEPTING

    def modify_lexeme(self, lexeme):
        """ Get the value of a given lexeme """
        if self is WordScannerState.QUOTED_WORD_END:
            return lexeme[1:-1]
        else:
            return lexeme

    @property
    def token(self):
        """ Get the token corresponding to this state """
        return WordScannerState._TOKEN_MAP.get(self, WordScannerToken.INVALID)

# Inject some helper attributes into WordScannerState
WordScannerState._ACCEPTING = frozenset([
    WordScannerState.EOF, WordScannerState.BARE_WORD,
    WordScannerState.QUOTED_WORD_END, WordScannerState.SPACE,
    WordScannerState.COMMENT_END, WordScannerState.COMMA,
    WordScannerState.EQUALS
])
WordScannerState._TOKEN_MAP = {
    WordScannerState.EOF: WordScannerToken.EOF,
    WordScannerState.BARE_WORD: WordScannerToken.WORD,
    WordScannerState.QUOTED_WORD_END: WordScannerToken.WORD,
    WordScannerState.SPACE: WordScannerToken.SPACE,
    WordScannerState.COMMENT_END: WordScannerToken.COMMENT,
    WordScannerState.COMMA: WordScannerToken.COMMA,
    WordScannerState.EQUALS: WordScannerToken.EQUALS,
}


class WordScanner(ScannerBase):
    """
    Support class for tokenizing a stream of words with shell comments.

    A word is anything that's not whitespace (of any kind). Since everything
    other than whitespace is a word, there is no way to break the scanner and
    end up in an error state. Comments are introduced with the ``#`` character
    and run to the end of the line.

    Iterating over the scanner will produce subsequent pairs of (token, lexeme)
    where the kind is one of the constants from :class:`WordScannerToken` and
    lexeme is the actual text (value) of the token

        >>> for token, lexeme in WordScanner('ala ma kota'):
        ...     print(lexeme)
        ala
        ma
        kota

    Empty input produces an EOF token:

        >>> WordScanner('').get_token()
        (<WordScannerToken.EOF: 0>, '')

    Words with white space can be quoted using double quotes:

        >>> WordScanner('"quoted word"').get_token()
        (<WordScannerToken.WORD: 1>, 'quoted word')

    White space is ignored and is not returned in any way (normally):

        >>> WordScanner('\\n\\t\\v\\rword').get_token()
        (<WordScannerToken.WORD: 1>, 'word')

    Though if you *really* want to, you can see everything by passing the
    ``ignore_irrelevant=False`` argument to :meth:`get_token()`:

        >>> scanner = WordScanner('\\n\\t\\v\\rword')
        >>> while True:
        ...     token, lexeme = scanner.get_token(ignore_irrelevant=False)
        ...     print('{:6} {!a}'.format(token.name, lexeme))
        ...     if token == scanner.TOKEN_EOF:
        ...         break
        SPACE  '\\n\\t\\x0b\\r'
        WORD   'word'
        EOF    ''

    The scanner has special provisions for recognizing some punctuation, this
    includes the comma and the equals sign as shown below:

        >>> for token, lexeme in WordScanner("k1=v1, k2=v2"):
        ...     print('{:6} {!a}'.format(token.name, lexeme))
        WORD   'k1'
        EQUALS '='
        WORD   'v1'
        COMMA  ','
        WORD   'k2'
        EQUALS '='
        WORD   'v2'

    Since both can appear in regular expressions, they can be quoted to prevent
    being recognized for their special meaning:

        >>> for token, lexeme in WordScanner('k1="v1, k2=v2"'):
        ...     print('{:6} {!a}'.format(token.name, lexeme))
        WORD   'k1'
        EQUALS '='
        WORD   'v1, k2=v2'

    """
    STATE_ERROR = WordScannerState.ERROR
    STATE_START = WordScannerState.START
    STATE_BAD = WordScannerState.BAD
    TOKEN_EOF = WordScannerToken.EOF

    TokenEnum = WordScannerToken

    def get_token(self, ignore_irrelevant=True):
        while True:
            token, lexeme = super().get_token()
            _logger.debug("outer: GOT %r %r", token, lexeme)
            if ignore_irrelevant and token.is_irrelevant:
                _logger.debug("outer: CONTINUING (irrelevant token found)")
                continue
            break
        return token, lexeme

    def _next_state_for(self, state, char):
        if state is WordScannerState.START:
            if char.isspace():
                return WordScannerState.SPACE
            elif char == '\0':
                return WordScannerState.EOF
            elif char == '#':
                return WordScannerState.COMMENT_INNER
            elif char == '"':
                return WordScannerState.QUOTED_WORD_INNER
            elif char == ',':
                return WordScannerState.COMMA
            elif char == '=':
                return WordScannerState.EQUALS
            else:
                return WordScannerState.BARE_WORD
        elif state is WordScannerState.SPACE:
            if char.isspace():
                return WordScannerState.SPACE
        elif state is WordScannerState.BARE_WORD:
            if char.isspace() or char in '\0#,=':
                return WordScannerState.ERROR
            else:
                return WordScannerState.BARE_WORD
        elif state is WordScannerState.COMMENT_INNER:
            if char == '\n' or char == '\0':
                return WordScannerState.COMMENT_END
            else:
                return WordScannerState.COMMENT_INNER
        elif state is WordScannerState.QUOTED_WORD_INNER:
            if char == '"':
                return WordScannerState.QUOTED_WORD_END
            if char == '\x00':
                return WordScannerState.ERROR
            else:
                return WordScannerState.QUOTED_WORD_INNER
            if char.isspace() or char == '\0' or char == '#':
                return WordScannerState.ERROR
            else:
                return WordScannerState.WORD
        elif state is WordScannerState.QUOTED_WORD_END:
            pass
        elif state is WordScannerState.COMMENT_END:
            pass
        elif state is WordScannerState.COMMA:
            pass
        elif state is WordScannerState.EQUALS:
            pass
        return WordScannerState.ERROR