This file is indexed.

/usr/lib/python2.7/dist-packages/praw/helpers.py is in python-praw 3.3.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# This file is part of PRAW.
#
# PRAW is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# PRAW is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# PRAW.  If not, see <http://www.gnu.org/licenses/>.

"""
Helper functions.

The functions here provide functionality that is often needed by programs using
PRAW, but which isn't part of reddit's API.
"""

from __future__ import unicode_literals

import six
import sys
import time
from collections import deque
from functools import partial
from timeit import default_timer as timer
from praw.errors import HTTPException

BACKOFF_START = 4  # Minimum number of seconds to sleep during errors
KEEP_ITEMS = 128  # On each iteration only remember the first # items


def comment_stream(reddit_session, subreddit, limit=None, verbosity=1):
    """Indefinitely yield new comments from the provided subreddit.

    Comments are yielded from oldest to newest.

    :param reddit_session: The reddit_session to make requests from. In all the
        examples this is assigned to the variable ``r``.
    :param subreddit: Either a subreddit object, or the name of a
        subreddit. Use `all` to get the comment stream for all comments made to
        reddit.
    :param limit: The maximum number of comments to fetch in a single
        iteration. When None, fetch all available comments (reddit limits this
        to 1000 (or multiple of 1000 for multi-subreddits). If this number is
        too small, comments may be missed.
    :param verbosity: A number that controls the amount of output produced to
        stderr. <= 0: no output; >= 1: output the total number of comments
        processed and provide the short-term number of comments processed per
        second; >= 2: output when additional delays are added in order to avoid
        subsequent unexpected http errors. >= 3: output debugging information
        regarding the comment stream. (Default: 1)

    """
    get_function = partial(reddit_session.get_comments,
                           six.text_type(subreddit))
    return _stream_generator(get_function, limit, verbosity)


def submission_stream(reddit_session, subreddit, limit=None, verbosity=1):
    """Indefinitely yield new submissions from the provided subreddit.

    Submissions are yielded from oldest to newest.

    :param reddit_session: The reddit_session to make requests from. In all the
        examples this is assigned to the variable ``r``.
    :param subreddit: Either a subreddit object, or the name of a
        subreddit. Use `all` to get the submissions stream for all submissions
        made to reddit.
    :param limit: The maximum number of submissions to fetch in a single
        iteration. When None, fetch all available submissions (reddit limits
        this to 1000 (or multiple of 1000 for multi-subreddits). If this number
        is too small, submissions may be missed. Since there isn't a limit to
        the number of submissions that can be retrieved from r/all, the limit
        will be set to 1000 when limit is None.
    :param verbosity: A number that controls the amount of output produced to
        stderr. <= 0: no output; >= 1: output the total number of submissions
        processed and provide the short-term number of submissions processed
        per second; >= 2: output when additional delays are added in order to
        avoid subsequent unexpected http errors. >= 3: output debugging
        information regarding the submission stream. (Default: 1)

    """
    if six.text_type(subreddit).lower() == "all":
        if limit is None:
            limit = 1000
    if not hasattr(subreddit, 'reddit_session'):
        subreddit = reddit_session.get_subreddit(subreddit)
    return _stream_generator(subreddit.get_new, limit, verbosity)


def valid_redditors(redditors, sub):
    """Return a verified list of valid Redditor instances.

    :param redditors: A list comprised of Redditor instances and/or strings
        that are to be verified as actual redditor accounts.
    :param sub: A Subreddit instance that the authenticated account has
        flair changing permission on.

    Note: Flair will be unset for all valid redditors in `redditors` on the
    subreddit `mod_sub`.

    """
    simplified = list(set(six.text_type(x).lower() for x in redditors))
    return [sub.reddit_session.get_redditor(simplified[i], fetch=False)
            for (i, resp) in enumerate(sub.set_flair_csv(
                ({'user': x, 'flair_text': x} for x in simplified)))
            if resp['ok']]


def _stream_generator(get_function, limit=None, verbosity=1):
    def debug(msg, level):
        if verbosity >= level:
            sys.stderr.write(msg + '\n')

    def b36_id(item):
        return int(item.id, 36)

    seen = BoundedSet(KEEP_ITEMS * 16)
    before = None
    count = 0  # Count is incremented to bypass the cache
    processed = 0
    backoff = BACKOFF_START
    while True:
        items = []
        sleep = None
        start = timer()
        try:
            i = None
            params = {'uniq': count}
            count = (count + 1) % 100
            if before:
                params['before'] = before
            gen = enumerate(get_function(limit=limit, params=params))
            for i, item in gen:
                if b36_id(item) in seen:
                    if i == 0:
                        if before is not None:
                            # reddit sent us out of order data  -- log it
                            debug('(INFO) {0} already seen with before of {1}'
                                  .format(item.fullname, before), 3)
                            before = None
                    break
                if i == 0:  # Always the first item in the generator
                    before = item.fullname
                if b36_id(item) not in seen:
                    items.append(item)
                    processed += 1
                if verbosity >= 1 and processed % 100 == 0:
                    sys.stderr.write(' Items: {0}            \r'
                                     .format(processed))
                    sys.stderr.flush()
                if i < KEEP_ITEMS:
                    seen.add(b36_id(item))
            else:  # Generator exhausted
                if i is None:  # Generator yielded no items
                    assert before is not None
                    # Try again without before as the before item may be too
                    # old or no longer exist.
                    before = None
            backoff = BACKOFF_START
        except HTTPException as exc:
            sleep = (backoff, '{0}. Sleeping for {{0}} seconds.'.format(exc),
                     2)
            backoff *= 2
        # Provide rate limit
        if verbosity >= 1:
            rate = len(items) / (timer() - start)
            sys.stderr.write(' Items: {0} ({1:.2f} ips)    \r'
                             .format(processed, rate))
            sys.stderr.flush()
        # Yield items from oldest to newest
        for item in items[::-1]:
            yield item
        # Sleep if necessary
        if sleep:
            sleep_time, msg, msg_level = sleep  # pylint: disable=W0633
            debug(msg.format(sleep_time), msg_level)
            time.sleep(sleep_time)


def convert_id36_to_numeric_id(id36):
    """Convert strings representing base36 numbers into an integer."""
    if not isinstance(id36, six.string_types) or id36.count("_") > 0:
        raise ValueError("must supply base36 string, not fullname (e.g. use "
                         "xxxxx, not t3_xxxxx)")
    return int(id36, 36)


def convert_numeric_id_to_id36(numeric_id):
    """Convert an integer into its base36 string representation.

    This method has been cleaned up slightly to improve readability. For more
    info see:

    https://github.com/reddit/reddit/blob/master/r2/r2/lib/utils/_utils.pyx

    https://www.reddit.com/r/redditdev/comments/n624n/submission_ids_question/

    https://en.wikipedia.org/wiki/Base36
    """
    # base36 allows negative numbers, but reddit does not
    if not isinstance(numeric_id, six.integer_types) or numeric_id < 0:
        raise ValueError("must supply a positive int/long")

    # Alphabet used for base 36 conversion
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    alphabet_len = len(alphabet)

    # Temp assign
    current_number = numeric_id
    base36 = []

    # Current_number must be greater than alphabet length to while/divmod
    if 0 <= current_number < alphabet_len:
        return alphabet[current_number]

    # Break up into chunks
    while current_number != 0:
        current_number, rem = divmod(current_number, alphabet_len)
        base36.append(alphabet[rem])

    # String is built in reverse order
    return ''.join(reversed(base36))


def flatten_tree(tree, nested_attr='replies', depth_first=False):
    """Return a flattened version of the passed in tree.

    :param nested_attr: The attribute name that contains the nested items.
        Defaults to ``replies`` which is suitable for comments.
    :param depth_first: When true, add to the list in a depth-first manner
        rather than the default breadth-first manner.

    """
    stack = deque(tree)
    extend = stack.extend if depth_first else stack.extendleft
    retval = []
    while stack:
        item = stack.popleft()
        nested = getattr(item, nested_attr, None)
        if nested:
            extend(nested)
        retval.append(item)
    return retval


def normalize_url(url):
    """Return url after stripping trailing .json and trailing slashes."""
    if url.endswith('.json'):
        url = url[:-5]
    if url.endswith('/'):
        url = url[:-1]
    return url


class BoundedSet(object):

    """A set with a maximum size that evicts the oldest items when necessary.

    This class does not implement the complete set interface.

    """

    def __init__(self, max_items):
        """Construct an instance of the BoundedSet."""
        self.max_items = max_items
        self._fifo = []
        self._set = set()

    def __contains__(self, item):
        """Test if the BoundedSet contains item."""
        return item in self._set

    def add(self, item):
        """Add an item to the set discarding the oldest item if necessary."""
        if item in self._set:
            self._fifo.remove(item)
        elif len(self._set) == self.max_items:
            self._set.remove(self._fifo.pop(0))
        self._fifo.append(item)
        self._set.add(item)