This file is indexed.

/usr/lib/python2.7/dist-packages/oops_datedir_repo/repository.py is in python-oops-datedir-repo 0.0.17-0ubuntu2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#
# Copyright (c) 2011, Canonical Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, version 3 only.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# GNU Lesser General Public License version 3 (see the file LICENSE).

"""The primary interface to oopses stored on disk - the DateDirRepo."""

__metaclass__ = type

__all__ = [
    'DateDirRepo',
    ]

import datetime
import errno
from functools  import partial
from hashlib import md5
import os.path
import stat

from pytz import utc

import anybson as bson
import serializer
import serializer_bson
from uniquefileallocator import UniqueFileAllocator


class DateDirRepo:
    """Publish oopses to a date-dir repository.

    A date-dir repository is a directory containing:

    * Zero or one directories called 'metadata'. If it exists this directory
      contains any housekeeping material needed (such as a metadata.conf ini
      file).

    * Zero or more directories named like YYYY-MM-DD, which contain zero or
      more OOPS reports. OOPS file names can take various forms, but must not
      end in .tmp - those are considered to be OOPS reports that are currently
      being written.
    """

    def __init__(self, error_dir, instance_id=None, serializer=None,
            inherit_id=False, stash_path=False):
        """Create a DateDirRepo.

        :param error_dir: The base directory to write OOPSes into. OOPSes are
            written into a subdirectory this named after the date (e.g.
            2011-12-30).
        :param instance_id: If None, OOPS file names are named after the OOPS
            id which is generated by hashing the serialized OOPS (without the
            id field). Otherwise OOPS file names and ids are created by
            allocating file names through a UniqueFileAllocator.
            UniqueFileAllocator has significant performance and concurrency
            limits and hash based naming is recommended.
        :param serializer: If supplied should be the module (e.g.
            oops_datedir_repo.serializer_rfc822) to use to serialize OOPSes.
            Defaults to using serializer_bson.
        :param inherit_id: If True, use the oops ID (if present) supplied in
            the report, rather than always assigning a new one.
        :param stash_path: If True, the filename that the OOPS was written to
            is stored in the OOPS report under the key 'datedir_repo_filepath'.
            It is not stored in the OOPS written to disk, only the in-memory
            model.
        """
        if instance_id is not None:
            self.log_namer = UniqueFileAllocator(
                output_root=error_dir,
                log_type="OOPS",
                log_subtype=instance_id,
                )
        else:
            self.log_namer = None
        self.root = error_dir
        if serializer is None:
            serializer = serializer_bson
        self.serializer = serializer
        self.inherit_id = inherit_id
        self.stash_path = stash_path
        self.metadatadir = os.path.join(self.root, 'metadata')
        self.config_path = os.path.join(self.metadatadir, 'config.bson')

    def publish(self, report, now=None):
        """Write the report to disk.

        The report is written to a temporary file, and then renamed to its
        final location. Programs concurrently reading from a DateDirRepo
        should ignore files ending in .tmp.

        :param now: The datetime to use as the current time.  Will be
            determined if not supplied.  Useful for testing.
        """
        # We set file permission to: rw-r--r-- (so that reports from
        # umask-restricted services can be gathered by a tool running as
        # another user).
        wanted_file_permission = (
            stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
        if now is not None:
            now = now.astimezone(utc)
        else:
            now = datetime.datetime.now(utc)
        # Don't mess with the original report when changing ids etc.
        original_report = report
        report = dict(report)
        if self.log_namer is not None:
            oopsid, filename = self.log_namer.newId(now)
        else:
            md5hash = md5(serializer_bson.dumps(report)).hexdigest()
            oopsid = 'OOPS-%s' % md5hash
            prefix = os.path.join(self.root, now.strftime('%Y-%m-%d'))
            if not os.path.isdir(prefix):
                os.makedirs(prefix)
                # For directories we need to set the x bits too.
                os.chmod(
                    prefix, wanted_file_permission | stat.S_IXUSR | stat.S_IXGRP |
                    stat.S_IXOTH)
            filename = os.path.join(prefix, oopsid)
        if self.inherit_id:
            oopsid = report.get('id') or oopsid
        report['id'] = oopsid
        self.serializer.write(report, open(filename + '.tmp', 'wb'))
        os.rename(filename + '.tmp', filename)
        if self.stash_path:
            original_report['datedir_repo_filepath'] = filename
        os.chmod(filename, wanted_file_permission)
        return report['id']

    def republish(self, publisher):
        """Republish the contents of the DateDirRepo to another publisher.

        This makes it easy to treat a DateDirRepo as a backing store in message
        queue environments: if the message queue is down, flush to the
        DateDirRepo, then later pick the OOPSes up and send them to the message
        queue environment.

        For instance:

          >>> repo = DateDirRepo('.')
          >>> repo.publish({'some':'report'})
          >>> queue = []
          >>> def queue_publisher(report):
          ...     queue.append(report)
          ...     return report['id']
          >>> repo.republish(queue_publisher)

        Will scan the disk and send the single found report to queue_publisher,
        deleting the report afterwards.

        Empty datedir directories are automatically cleaned up, as are stale
        .tmp files.

        If the publisher returns None, signalling that it did not publish the
        report, then the report is not deleted from disk.
        """
        two_days = datetime.timedelta(2)
        now = datetime.date.today()
        old = now - two_days
        for dirname, (y,m,d) in self._datedirs():
            date = datetime.date(y, m, d)
            prune = date < old
            dirpath = os.path.join(self.root, dirname)
            files = os.listdir(dirpath)
            if not files and prune:
                # Cleanup no longer needed directory.
                os.rmdir(dirpath)
            for candidate in map(partial(os.path.join, dirpath), files):
                if candidate.endswith('.tmp'):
                    if prune:
                        os.unlink(candidate)
                    continue
                with file(candidate, 'rb') as report_file:
                    report = serializer.read(report_file)
                oopsid = publisher(report)
                if oopsid:
                    os.unlink(candidate)

    def _datedirs(self):
        """Yield each subdir which looks like a datedir."""
        for dirname in os.listdir(self.root):
            try:
                y, m, d = dirname.split('-')
                y = int(y)
                m = int(m)
                d = int(d)
            except ValueError:
                # Not a datedir
                continue
            yield dirname, (y, m, d)

    def _read_config(self):
        """Return the current config document from disk."""
        try:
            with open(self.config_path, 'rb') as config_file:
                return bson.loads(config_file.read())
        except IOError, e:
            if e.errno != errno.ENOENT:
                raise
            return {}

    def get_config(self, key):
        """Return a key from the repository config.

        :param key: A key to read from the config.
        """
        return self._read_config()[key]

    def set_config(self, key, value):
        """Set config option key to value.

        This is written to the bson document root/metadata/config.bson

        :param key: The key to set - anything that can be a key in a bson
            document.
        :param value: The value to set - anything that can be a value in a
            bson document.
        """
        config = self._read_config()
        config[key] = value
        try:
            with open(self.config_path + '.tmp', 'wb') as config_file:
                config_file.write(bson.dumps(config))
        except IOError, e:
            if e.errno != errno.ENOENT:
                raise
            os.mkdir(self.metadatadir)
            with open(self.config_path + '.tmp', 'wb') as config_file:
                config_file.write(bson.dumps(config))
        os.rename(self.config_path + '.tmp', self.config_path)

    def oldest_date(self):
        """Return the date of the oldest datedir in the repository.

        If pruning / resubmission is working this should also be the date of
        the oldest oops in the repository.
        """
        dirs = list(self._datedirs())
        if not dirs:
            raise ValueError("No OOPSes in repository.")
        return datetime.date(*sorted(dirs)[0][1])

    def prune_unreferenced(self, start_time, stop_time, references):
        """Delete OOPS reports filed between start_time and stop_time.

        A report is deleted if all of the following are true:

        * it is in a datedir covered by [start_time, stop_time] inclusive of
          the end points.

        * It is not in the set references.

        * Its timestamp falls between start_time and stop_time inclusively or
          it's timestamp is outside the datedir it is in or there is no
          timestamp on the report.

        :param start_time: The lower bound to prune within.
        :param stop_time: The upper bound to prune within.
        :param references: An iterable of OOPS ids to keep.
        """
        start_date = start_time.date()
        stop_date = stop_time.date()
        midnight = datetime.time(tzinfo=utc)
        for dirname, (y,m,d) in self._datedirs():
            dirdate = datetime.date(y, m, d)
            if dirdate < start_date or dirdate > stop_date:
                continue
            dirpath = os.path.join(self.root, dirname)
            files = os.listdir(dirpath)
            deleted = 0
            for candidate in map(partial(os.path.join, dirpath), files):
                if candidate.endswith('.tmp'):
                    # Old half-written oops: just remove.
                    os.unlink(candidate)
                    deleted += 1
                    continue
                with file(candidate, 'rb') as report_file:
                    report = serializer.read(report_file)
                    report_time = report.get('time', None)
                    if (report_time is None or
                        report_time.date() < dirdate or
                        report_time.date() > dirdate):
                        # The report is oddly filed or missing a precise
                        # datestamp. Treat it like midnight on the day of the
                        # directory it was placed in - this is a lower bound on
                        # when it was actually created.
                        report_time = datetime.datetime.combine(
                            dirdate, midnight)
                    if (report_time >= start_time and
                        report_time <= stop_time and
                        report['id'] not in references):
                        # Unreferenced and prunable
                        os.unlink(candidate)
                        deleted += 1
            if deleted == len(files):
                # Everything in the directory was deleted.
                os.rmdir(dirpath)