/usr/lib/python3/dist-packages/praw/helpers.py is in python3-praw 3.3.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | # This file is part of PRAW.
#
# PRAW is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# PRAW is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# PRAW. If not, see <http://www.gnu.org/licenses/>.
"""
Helper functions.
The functions here provide functionality that is often needed by programs using
PRAW, but which isn't part of reddit's API.
"""
from __future__ import unicode_literals
import six
import sys
import time
from collections import deque
from functools import partial
from timeit import default_timer as timer
from praw.errors import HTTPException
BACKOFF_START = 4 # Minimum number of seconds to sleep during errors
KEEP_ITEMS = 128 # On each iteration only remember the first # items
def comment_stream(reddit_session, subreddit, limit=None, verbosity=1):
"""Indefinitely yield new comments from the provided subreddit.
Comments are yielded from oldest to newest.
:param reddit_session: The reddit_session to make requests from. In all the
examples this is assigned to the variable ``r``.
:param subreddit: Either a subreddit object, or the name of a
subreddit. Use `all` to get the comment stream for all comments made to
reddit.
:param limit: The maximum number of comments to fetch in a single
iteration. When None, fetch all available comments (reddit limits this
to 1000 (or multiple of 1000 for multi-subreddits). If this number is
too small, comments may be missed.
:param verbosity: A number that controls the amount of output produced to
stderr. <= 0: no output; >= 1: output the total number of comments
processed and provide the short-term number of comments processed per
second; >= 2: output when additional delays are added in order to avoid
subsequent unexpected http errors. >= 3: output debugging information
regarding the comment stream. (Default: 1)
"""
get_function = partial(reddit_session.get_comments,
six.text_type(subreddit))
return _stream_generator(get_function, limit, verbosity)
def submission_stream(reddit_session, subreddit, limit=None, verbosity=1):
"""Indefinitely yield new submissions from the provided subreddit.
Submissions are yielded from oldest to newest.
:param reddit_session: The reddit_session to make requests from. In all the
examples this is assigned to the variable ``r``.
:param subreddit: Either a subreddit object, or the name of a
subreddit. Use `all` to get the submissions stream for all submissions
made to reddit.
:param limit: The maximum number of submissions to fetch in a single
iteration. When None, fetch all available submissions (reddit limits
this to 1000 (or multiple of 1000 for multi-subreddits). If this number
is too small, submissions may be missed. Since there isn't a limit to
the number of submissions that can be retrieved from r/all, the limit
will be set to 1000 when limit is None.
:param verbosity: A number that controls the amount of output produced to
stderr. <= 0: no output; >= 1: output the total number of submissions
processed and provide the short-term number of submissions processed
per second; >= 2: output when additional delays are added in order to
avoid subsequent unexpected http errors. >= 3: output debugging
information regarding the submission stream. (Default: 1)
"""
if six.text_type(subreddit).lower() == "all":
if limit is None:
limit = 1000
if not hasattr(subreddit, 'reddit_session'):
subreddit = reddit_session.get_subreddit(subreddit)
return _stream_generator(subreddit.get_new, limit, verbosity)
def valid_redditors(redditors, sub):
"""Return a verified list of valid Redditor instances.
:param redditors: A list comprised of Redditor instances and/or strings
that are to be verified as actual redditor accounts.
:param sub: A Subreddit instance that the authenticated account has
flair changing permission on.
Note: Flair will be unset for all valid redditors in `redditors` on the
subreddit `mod_sub`.
"""
simplified = list(set(six.text_type(x).lower() for x in redditors))
return [sub.reddit_session.get_redditor(simplified[i], fetch=False)
for (i, resp) in enumerate(sub.set_flair_csv(
({'user': x, 'flair_text': x} for x in simplified)))
if resp['ok']]
def _stream_generator(get_function, limit=None, verbosity=1):
def debug(msg, level):
if verbosity >= level:
sys.stderr.write(msg + '\n')
def b36_id(item):
return int(item.id, 36)
seen = BoundedSet(KEEP_ITEMS * 16)
before = None
count = 0 # Count is incremented to bypass the cache
processed = 0
backoff = BACKOFF_START
while True:
items = []
sleep = None
start = timer()
try:
i = None
params = {'uniq': count}
count = (count + 1) % 100
if before:
params['before'] = before
gen = enumerate(get_function(limit=limit, params=params))
for i, item in gen:
if b36_id(item) in seen:
if i == 0:
if before is not None:
# reddit sent us out of order data -- log it
debug('(INFO) {0} already seen with before of {1}'
.format(item.fullname, before), 3)
before = None
break
if i == 0: # Always the first item in the generator
before = item.fullname
if b36_id(item) not in seen:
items.append(item)
processed += 1
if verbosity >= 1 and processed % 100 == 0:
sys.stderr.write(' Items: {0} \r'
.format(processed))
sys.stderr.flush()
if i < KEEP_ITEMS:
seen.add(b36_id(item))
else: # Generator exhausted
if i is None: # Generator yielded no items
assert before is not None
# Try again without before as the before item may be too
# old or no longer exist.
before = None
backoff = BACKOFF_START
except HTTPException as exc:
sleep = (backoff, '{0}. Sleeping for {{0}} seconds.'.format(exc),
2)
backoff *= 2
# Provide rate limit
if verbosity >= 1:
rate = len(items) / (timer() - start)
sys.stderr.write(' Items: {0} ({1:.2f} ips) \r'
.format(processed, rate))
sys.stderr.flush()
# Yield items from oldest to newest
for item in items[::-1]:
yield item
# Sleep if necessary
if sleep:
sleep_time, msg, msg_level = sleep # pylint: disable=W0633
debug(msg.format(sleep_time), msg_level)
time.sleep(sleep_time)
def convert_id36_to_numeric_id(id36):
"""Convert strings representing base36 numbers into an integer."""
if not isinstance(id36, six.string_types) or id36.count("_") > 0:
raise ValueError("must supply base36 string, not fullname (e.g. use "
"xxxxx, not t3_xxxxx)")
return int(id36, 36)
def convert_numeric_id_to_id36(numeric_id):
"""Convert an integer into its base36 string representation.
This method has been cleaned up slightly to improve readability. For more
info see:
https://github.com/reddit/reddit/blob/master/r2/r2/lib/utils/_utils.pyx
https://www.reddit.com/r/redditdev/comments/n624n/submission_ids_question/
https://en.wikipedia.org/wiki/Base36
"""
# base36 allows negative numbers, but reddit does not
if not isinstance(numeric_id, six.integer_types) or numeric_id < 0:
raise ValueError("must supply a positive int/long")
# Alphabet used for base 36 conversion
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
alphabet_len = len(alphabet)
# Temp assign
current_number = numeric_id
base36 = []
# Current_number must be greater than alphabet length to while/divmod
if 0 <= current_number < alphabet_len:
return alphabet[current_number]
# Break up into chunks
while current_number != 0:
current_number, rem = divmod(current_number, alphabet_len)
base36.append(alphabet[rem])
# String is built in reverse order
return ''.join(reversed(base36))
def flatten_tree(tree, nested_attr='replies', depth_first=False):
"""Return a flattened version of the passed in tree.
:param nested_attr: The attribute name that contains the nested items.
Defaults to ``replies`` which is suitable for comments.
:param depth_first: When true, add to the list in a depth-first manner
rather than the default breadth-first manner.
"""
stack = deque(tree)
extend = stack.extend if depth_first else stack.extendleft
retval = []
while stack:
item = stack.popleft()
nested = getattr(item, nested_attr, None)
if nested:
extend(nested)
retval.append(item)
return retval
def normalize_url(url):
"""Return url after stripping trailing .json and trailing slashes."""
if url.endswith('.json'):
url = url[:-5]
if url.endswith('/'):
url = url[:-1]
return url
class BoundedSet(object):
"""A set with a maximum size that evicts the oldest items when necessary.
This class does not implement the complete set interface.
"""
def __init__(self, max_items):
"""Construct an instance of the BoundedSet."""
self.max_items = max_items
self._fifo = []
self._set = set()
def __contains__(self, item):
"""Test if the BoundedSet contains item."""
return item in self._set
def add(self, item):
"""Add an item to the set discarding the oldest item if necessary."""
if item in self._set:
self._fifo.remove(item)
elif len(self._set) == self.max_items:
self._set.remove(self._fifo.pop(0))
self._fifo.append(item)
self._set.add(item)
|