This file is indexed.

/usr/share/pyshared/ZODB/scripts/stats.py is in python-zodb 1:3.9.7-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Trace file statistics analyzer.

Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
-h: print histogram of object load frequencies
-i: summarizing interval in minutes (default 15; max 60)
-q: quiet; don't print summaries
-s: print histogram of object sizes
-S: don't print statistics
-v: verbose; print each record
-X: enable heuristic checking for misaligned records: oids > 2**32
    will be rejected; this requires the tracefile to be seekable
"""

"""File format:

Each record is 26 bytes, plus a variable number of bytes to store an oid,
with the following layout.  Numbers are big-endian integers.

Offset  Size  Contents

0       4     timestamp (seconds since 1/1/1970)
4       3     data size, in 256-byte increments, rounded up
7       1     code (see below)
8       2     object id length
10      8     start tid
18      8     end tid
26  variable  object id

The code at offset 7 packs three fields:

Mask    bits  Contents

0x80    1     set if there was a non-empty version string
0x7e    6     function and outcome code
0x01    1     current cache file (0 or 1)

The "current cache file" bit is no longer used; it refers to a 2-file
cache scheme used before ZODB 3.3.

The function and outcome codes are documented in detail at the end of
this file in the 'explain' dictionary.  Note that the keys there (and
also the arguments to _trace() in ClientStorage.py) are 'code & 0x7e',
i.e. the low bit is always zero.
"""

import sys
import time
import getopt
import struct
from types import StringType

def usage(msg):
    print >> sys.stderr, msg
    print >> sys.stderr, __doc__

def main():
    # Parse options
    verbose = False
    quiet = False
    dostats = True
    print_size_histogram = False
    print_histogram = False
    interval = 15*60 # Every 15 minutes
    heuristic = False
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:qsSvX")
    except getopt.error, msg:
        usage(msg)
        return 2
    for o, a in opts:
        if o == '-h':
            print_histogram = True
        elif o == "-i":
            interval = int(60 * float(a))
            if interval <= 0:
                interval = 60
            elif interval > 3600:
                interval = 3600
        elif o == "-q":
            quiet = True
            verbose = False
        elif o == "-s":
            print_size_histogram = True
        elif o == "-S":
            dostats = False
        elif o == "-v":
            verbose = True
        elif o == '-X':
            heuristic = True
        else:
            assert False, (o, opts)

    if len(args) != 1:
        usage("exactly one file argument required")
        return 2
    filename = args[0]

    # Open file
    if filename.endswith(".gz"):
        # Open gzipped file
        try:
            import gzip
        except ImportError:
            print >> sys.stderr, "can't read gzipped files (no module gzip)"
            return 1
        try:
            f = gzip.open(filename, "rb")
        except IOError, msg:
            print >> sys.stderr, "can't open %s: %s" % (filename, msg)
            return 1
    elif filename == '-':
        # Read from stdin
        f = sys.stdin
    else:
        # Open regular file
        try:
            f = open(filename, "rb")
        except IOError, msg:
            print >> sys.stderr, "can't open %s: %s" % (filename, msg)
            return 1

    rt0 = time.time()
    bycode = {}     # map code to count of occurrences
    byinterval = {} # map code to count in current interval
    records = 0     # number of trace records read
    versions = 0    # number of trace records with versions
    datarecords = 0 # number of records with dlen set
    datasize = 0L   # sum of dlen across records with dlen set
    oids = {}       # map oid to number of times it was loaded
    bysize = {}     # map data size to number of loads
    bysizew = {}    # map data size to number of writes
    total_loads = 0
    t0 = None       # first timestamp seen
    te = None       # most recent timestamp seen
    h0 = None       # timestamp at start of current interval
    he = None       # timestamp at end of current interval
    thisinterval = None  # generally te//interval
    f_read = f.read
    unpack = struct.unpack
    FMT = ">iiH8s8s"
    FMT_SIZE = struct.calcsize(FMT)
    assert FMT_SIZE == 26
    # Read file, gathering statistics, and printing each record if verbose.
    try:
        while 1:
            r = f_read(FMT_SIZE)
            if len(r) < FMT_SIZE:
                break
            ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
            if ts == 0:
                # Must be a misaligned record caused by a crash.
                if not quiet:
                    print "Skipping 8 bytes at offset", f.tell() - FMT_SIZE
                    f.seek(f.tell() - FMT_SIZE + 8)
                continue
            oid = f_read(oidlen)
            if len(oid) < oidlen:
                break
            records += 1
            if t0 is None:
                t0 = ts
                thisinterval = t0 // interval
                h0 = he = ts
            te = ts
            if ts // interval != thisinterval:
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
                thisinterval = ts // interval
                h0 = ts
            he = ts
            dlen, code = code & 0x7fffff00, code & 0xff
            if dlen:
                datarecords += 1
                datasize += dlen
            if code & 0x80:
                version = 'V'
                versions += 1
            else:
                version = '-'
            code &= 0x7e
            bycode[code] = bycode.get(code, 0) + 1
            byinterval[code] = byinterval.get(code, 0) + 1
            if dlen:
                if code & 0x70 == 0x20: # All loads
                    bysize[dlen] = d = bysize.get(dlen) or {}
                    d[oid] = d.get(oid, 0) + 1
                elif code & 0x70 == 0x50: # All stores
                    bysizew[dlen] = d = bysizew.get(dlen) or {}
                    d[oid] = d.get(oid, 0) + 1
            if verbose:
                print "%s %02x %s %016x %016x %c %s" % (
                    time.ctime(ts)[4:-5],
                    code,
                    oid_repr(oid),
                    U64(start_tid),
                    U64(end_tid),
                    version,
                    dlen and str(dlen) or "")
            if code & 0x70 == 0x20:
                oids[oid] = oids.get(oid, 0) + 1
                total_loads += 1
            elif code == 0x00:    # restart
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
                thisinterval = ts // interval
                h0 = he = ts
                if not quiet:
                    print time.ctime(ts)[4:-5],
                    print '='*20, "Restart", '='*20
    except KeyboardInterrupt:
        print "\nInterrupted.  Stats so far:\n"

    end_pos = f.tell()
    f.close()
    rte = time.time()
    if not quiet:
        dumpbyinterval(byinterval, h0, he)

    # Error if nothing was read
    if not records:
        print >> sys.stderr, "No records processed"
        return 1

    # Print statistics
    if dostats:
        print
        print "Read %s trace records (%s bytes) in %.1f seconds" % (
            addcommas(records), addcommas(end_pos), rte-rt0)
        print "Versions:   %s records used a version" % addcommas(versions)
        print "First time: %s" % time.ctime(t0)
        print "Last time:  %s" % time.ctime(te)
        print "Duration:   %s seconds" % addcommas(te-t0)
        print "Data recs:  %s (%.1f%%), average size %.1f KB" % (
            addcommas(datarecords),
            100.0 * datarecords / records,
            datasize / 1024.0 / datarecords)
        print "Hit rate:   %.1f%% (load hits / loads)" % hitrate(bycode)
        print
        codes = bycode.keys()
        codes.sort()
        print "%13s %4s %s" % ("Count", "Code", "Function (action)")
        for code in codes:
            print "%13s  %02x  %s" % (
                addcommas(bycode.get(code, 0)),
                code,
                explain.get(code) or "*** unknown code ***")

    # Print histogram.
    if print_histogram:
        print
        print "Histogram of object load frequency"
        total = len(oids)
        print "Unique oids: %s" % addcommas(total)
        print "Total loads: %s" % addcommas(total_loads)
        s = addcommas(total)
        width = max(len(s), len("objects"))
        fmt = "%5d %" + str(width) + "s %5.1f%% %5.1f%% %5.1f%%"
        hdr = "%5s %" + str(width) + "s %6s %6s %6s"
        print hdr % ("loads", "objects", "%obj", "%load", "%cum")
        cum = 0.0
        for binsize, count in histogram(oids):
            obj_percent = 100.0 * count / total
            load_percent = 100.0 * count * binsize / total_loads
            cum += load_percent
            print fmt % (binsize, addcommas(count),
                         obj_percent, load_percent, cum)

    # Print size histogram.
    if print_size_histogram:
        print
        print "Histograms of object sizes"
        print
        dumpbysize(bysizew, "written", "writes")
        dumpbysize(bysize, "loaded", "loads")

def dumpbysize(bysize, how, how2):
    print
    print "Unique sizes %s: %s" % (how, addcommas(len(bysize)))
    print "%10s %6s %6s" % ("size", "objs", how2)
    sizes = bysize.keys()
    sizes.sort()
    for size in sizes:
        loads = 0
        for n in bysize[size].itervalues():
            loads += n
        print "%10s %6d %6d" % (addcommas(size),
                                len(bysize.get(size, "")),
                                loads)

def dumpbyinterval(byinterval, h0, he):
    loads = hits = 0
    for code in byinterval:
        if code & 0x70 == 0x20:
            n = byinterval[code]
            loads += n
            if code in (0x22, 0x26):
                hits += n
    if not loads:
        return
    if loads:
        hr = 100.0 * hits / loads
    else:
        hr = 0.0
    print "%s-%s %10s loads, %10s hits,%5.1f%% hit rate" % (
        time.ctime(h0)[4:-8], time.ctime(he)[14:-8],
        addcommas(loads), addcommas(hits), hr)

def hitrate(bycode):
    loads = hits = 0
    for code in bycode:
        if code & 0x70 == 0x20:
            n = bycode[code]
            loads += n
            if code in (0x22, 0x26):
                hits += n
    if loads:
        return 100.0 * hits / loads
    else:
        return 0.0

def histogram(d):
    bins = {}
    for v in d.itervalues():
        bins[v] = bins.get(v, 0) + 1
    L = bins.items()
    L.sort()
    return L

def U64(s):
    return struct.unpack(">Q", s)[0]

def oid_repr(oid):
    if isinstance(oid, StringType) and len(oid) == 8:
        return '%16x' % U64(oid)
    else:
        return repr(oid)

def addcommas(n):
    sign, s = '', str(n)
    if s[0] == '-':
        sign, s = '-', s[1:]
    i = len(s) - 3
    while i > 0:
        s = s[:i] + ',' + s[i:]
        i -= 3
    return sign + s

explain = {
    # The first hex digit shows the operation, the second the outcome.
    # If the second digit is in "02468" then it is a 'miss'.
    # If it is in "ACE" then it is a 'hit'.

    0x00: "_setup_trace (initialization)",

    0x10: "invalidate (miss)",
    0x1A: "invalidate (hit, version)",
    0x1C: "invalidate (hit, saving non-current)",
    # 0x1E can occur during startup verification.
    0x1E: "invalidate (hit, discarding current or non-current)",

    0x20: "load (miss)",
    0x22: "load (hit)",
    0x24: "load (non-current, miss)",
    0x26: "load (non-current, hit)",

    0x50: "store (version)",
    0x52: "store (current, non-version)",
    0x54: "store (non-current)",
    }

if __name__ == "__main__":
    sys.exit(main())