/usr/lib/python2.7/dist-packages/oops_datedir_repo/repository.py is in python-oops-datedir-repo 0.0.17-0ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | #
# Copyright (c) 2011, Canonical Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, version 3 only.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# GNU Lesser General Public License version 3 (see the file LICENSE).
"""The primary interface to oopses stored on disk - the DateDirRepo."""
__metaclass__ = type
__all__ = [
'DateDirRepo',
]
import datetime
import errno
from functools import partial
from hashlib import md5
import os.path
import stat
from pytz import utc
import anybson as bson
import serializer
import serializer_bson
from uniquefileallocator import UniqueFileAllocator
class DateDirRepo:
"""Publish oopses to a date-dir repository.
A date-dir repository is a directory containing:
* Zero or one directories called 'metadata'. If it exists this directory
contains any housekeeping material needed (such as a metadata.conf ini
file).
* Zero or more directories named like YYYY-MM-DD, which contain zero or
more OOPS reports. OOPS file names can take various forms, but must not
end in .tmp - those are considered to be OOPS reports that are currently
being written.
"""
def __init__(self, error_dir, instance_id=None, serializer=None,
inherit_id=False, stash_path=False):
"""Create a DateDirRepo.
:param error_dir: The base directory to write OOPSes into. OOPSes are
written into a subdirectory this named after the date (e.g.
2011-12-30).
:param instance_id: If None, OOPS file names are named after the OOPS
id which is generated by hashing the serialized OOPS (without the
id field). Otherwise OOPS file names and ids are created by
allocating file names through a UniqueFileAllocator.
UniqueFileAllocator has significant performance and concurrency
limits and hash based naming is recommended.
:param serializer: If supplied should be the module (e.g.
oops_datedir_repo.serializer_rfc822) to use to serialize OOPSes.
Defaults to using serializer_bson.
:param inherit_id: If True, use the oops ID (if present) supplied in
the report, rather than always assigning a new one.
:param stash_path: If True, the filename that the OOPS was written to
is stored in the OOPS report under the key 'datedir_repo_filepath'.
It is not stored in the OOPS written to disk, only the in-memory
model.
"""
if instance_id is not None:
self.log_namer = UniqueFileAllocator(
output_root=error_dir,
log_type="OOPS",
log_subtype=instance_id,
)
else:
self.log_namer = None
self.root = error_dir
if serializer is None:
serializer = serializer_bson
self.serializer = serializer
self.inherit_id = inherit_id
self.stash_path = stash_path
self.metadatadir = os.path.join(self.root, 'metadata')
self.config_path = os.path.join(self.metadatadir, 'config.bson')
def publish(self, report, now=None):
"""Write the report to disk.
The report is written to a temporary file, and then renamed to its
final location. Programs concurrently reading from a DateDirRepo
should ignore files ending in .tmp.
:param now: The datetime to use as the current time. Will be
determined if not supplied. Useful for testing.
"""
# We set file permission to: rw-r--r-- (so that reports from
# umask-restricted services can be gathered by a tool running as
# another user).
wanted_file_permission = (
stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
if now is not None:
now = now.astimezone(utc)
else:
now = datetime.datetime.now(utc)
# Don't mess with the original report when changing ids etc.
original_report = report
report = dict(report)
if self.log_namer is not None:
oopsid, filename = self.log_namer.newId(now)
else:
md5hash = md5(serializer_bson.dumps(report)).hexdigest()
oopsid = 'OOPS-%s' % md5hash
prefix = os.path.join(self.root, now.strftime('%Y-%m-%d'))
if not os.path.isdir(prefix):
os.makedirs(prefix)
# For directories we need to set the x bits too.
os.chmod(
prefix, wanted_file_permission | stat.S_IXUSR | stat.S_IXGRP |
stat.S_IXOTH)
filename = os.path.join(prefix, oopsid)
if self.inherit_id:
oopsid = report.get('id') or oopsid
report['id'] = oopsid
self.serializer.write(report, open(filename + '.tmp', 'wb'))
os.rename(filename + '.tmp', filename)
if self.stash_path:
original_report['datedir_repo_filepath'] = filename
os.chmod(filename, wanted_file_permission)
return report['id']
def republish(self, publisher):
"""Republish the contents of the DateDirRepo to another publisher.
This makes it easy to treat a DateDirRepo as a backing store in message
queue environments: if the message queue is down, flush to the
DateDirRepo, then later pick the OOPSes up and send them to the message
queue environment.
For instance:
>>> repo = DateDirRepo('.')
>>> repo.publish({'some':'report'})
>>> queue = []
>>> def queue_publisher(report):
... queue.append(report)
... return report['id']
>>> repo.republish(queue_publisher)
Will scan the disk and send the single found report to queue_publisher,
deleting the report afterwards.
Empty datedir directories are automatically cleaned up, as are stale
.tmp files.
If the publisher returns None, signalling that it did not publish the
report, then the report is not deleted from disk.
"""
two_days = datetime.timedelta(2)
now = datetime.date.today()
old = now - two_days
for dirname, (y,m,d) in self._datedirs():
date = datetime.date(y, m, d)
prune = date < old
dirpath = os.path.join(self.root, dirname)
files = os.listdir(dirpath)
if not files and prune:
# Cleanup no longer needed directory.
os.rmdir(dirpath)
for candidate in map(partial(os.path.join, dirpath), files):
if candidate.endswith('.tmp'):
if prune:
os.unlink(candidate)
continue
with file(candidate, 'rb') as report_file:
report = serializer.read(report_file)
oopsid = publisher(report)
if oopsid:
os.unlink(candidate)
def _datedirs(self):
"""Yield each subdir which looks like a datedir."""
for dirname in os.listdir(self.root):
try:
y, m, d = dirname.split('-')
y = int(y)
m = int(m)
d = int(d)
except ValueError:
# Not a datedir
continue
yield dirname, (y, m, d)
def _read_config(self):
"""Return the current config document from disk."""
try:
with open(self.config_path, 'rb') as config_file:
return bson.loads(config_file.read())
except IOError, e:
if e.errno != errno.ENOENT:
raise
return {}
def get_config(self, key):
"""Return a key from the repository config.
:param key: A key to read from the config.
"""
return self._read_config()[key]
def set_config(self, key, value):
"""Set config option key to value.
This is written to the bson document root/metadata/config.bson
:param key: The key to set - anything that can be a key in a bson
document.
:param value: The value to set - anything that can be a value in a
bson document.
"""
config = self._read_config()
config[key] = value
try:
with open(self.config_path + '.tmp', 'wb') as config_file:
config_file.write(bson.dumps(config))
except IOError, e:
if e.errno != errno.ENOENT:
raise
os.mkdir(self.metadatadir)
with open(self.config_path + '.tmp', 'wb') as config_file:
config_file.write(bson.dumps(config))
os.rename(self.config_path + '.tmp', self.config_path)
def oldest_date(self):
"""Return the date of the oldest datedir in the repository.
If pruning / resubmission is working this should also be the date of
the oldest oops in the repository.
"""
dirs = list(self._datedirs())
if not dirs:
raise ValueError("No OOPSes in repository.")
return datetime.date(*sorted(dirs)[0][1])
def prune_unreferenced(self, start_time, stop_time, references):
"""Delete OOPS reports filed between start_time and stop_time.
A report is deleted if all of the following are true:
* it is in a datedir covered by [start_time, stop_time] inclusive of
the end points.
* It is not in the set references.
* Its timestamp falls between start_time and stop_time inclusively or
it's timestamp is outside the datedir it is in or there is no
timestamp on the report.
:param start_time: The lower bound to prune within.
:param stop_time: The upper bound to prune within.
:param references: An iterable of OOPS ids to keep.
"""
start_date = start_time.date()
stop_date = stop_time.date()
midnight = datetime.time(tzinfo=utc)
for dirname, (y,m,d) in self._datedirs():
dirdate = datetime.date(y, m, d)
if dirdate < start_date or dirdate > stop_date:
continue
dirpath = os.path.join(self.root, dirname)
files = os.listdir(dirpath)
deleted = 0
for candidate in map(partial(os.path.join, dirpath), files):
if candidate.endswith('.tmp'):
# Old half-written oops: just remove.
os.unlink(candidate)
deleted += 1
continue
with file(candidate, 'rb') as report_file:
report = serializer.read(report_file)
report_time = report.get('time', None)
if (report_time is None or
report_time.date() < dirdate or
report_time.date() > dirdate):
# The report is oddly filed or missing a precise
# datestamp. Treat it like midnight on the day of the
# directory it was placed in - this is a lower bound on
# when it was actually created.
report_time = datetime.datetime.combine(
dirdate, midnight)
if (report_time >= start_time and
report_time <= stop_time and
report['id'] not in references):
# Unreferenced and prunable
os.unlink(candidate)
deleted += 1
if deleted == len(files):
# Everything in the directory was deleted.
os.rmdir(dirpath)
|