This file is indexed.

/usr/share/pyshared/ZODB/scripts/migrate.py is in python-zodb 1:3.9.7-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#!/usr/bin/python

##############################################################################
#
# Copyright (c) 2001, 2002, 2003 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

"""A script to gather statistics while doing a storage migration.

This is very similar to a standard storage's copyTransactionsFrom() method,
except that it's geared to run as a script, and it collects useful pieces of
information as it's working.  This script can be used to stress test a storage
since it blasts transactions at it as fast as possible.  You can get a good
sense of the performance of a storage by running this script.

Actually it just counts the size of pickles in the transaction via the
iterator protocol, so storage overheads aren't counted.

Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args]
Options:
    -S sourcetype
    --stype=sourcetype
        This is the name of a recognized type for the source database.  Use -T
        to print out the known types.  Defaults to "file".

    -D desttype
    --dtype=desttype
        This is the name of the recognized type for the destination database.
        Use -T to print out the known types.  Defaults to "file".

    -o filename
    --output=filename
        Print results in filename, otherwise stdout.

    -m txncount
    --max=txncount
        Stop after committing txncount transactions.

    -k txncount
    --skip=txncount
        Skip the first txncount transactions.

    -p/--profile
        Turn on specialized profiling.

    -t/--timestamps
        Print tids as timestamps.

    -T/--storage_types
        Print all the recognized storage types and exit.

    -v/--verbose
        Turns on verbose output.  Multiple -v options increase the verbosity.

    -h/--help
        Print this message and exit.

Positional arguments:

    source-storage-args:
        Semicolon separated list of arguments for the source storage, as
        key=val pairs.  E.g. "file_name=Data.fs;read_only=1"

    destination-storage-args:
        Comma separated list of arguments for the source storage, as key=val
        pairs.  E.g. "name=full;frequency=3600"
"""

import re
import sys
import time
import getopt
import marshal
import profile

from ZODB import utils
from ZODB import StorageTypes
from ZODB.TimeStamp import TimeStamp

PROGRAM = sys.argv[0]
ZERO = '\0'*8

try:
    True, False
except NameError:
    True = 1
    False = 0



def usage(code, msg=''):
    print >> sys.stderr, __doc__ % globals()
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)


def error(code, msg):
    print >> sys.stderr, msg
    print "use --help for usage message"
    sys.exit(code)



def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            'hvo:pm:k:D:S:Tt',
            ['help', 'verbose',
             'output=', 'profile', 'storage_types',
             'max=', 'skip=', 'dtype=', 'stype=', 'timestamps'])
    except getopt.error, msg:
        error(2, msg)

    class Options:
        stype = 'FileStorage'
        dtype = 'FileStorage'
        verbose = 0
        outfile = None
        profilep = False
        maxtxn = -1
        skiptxn = -1
        timestamps = False

    options = Options()

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-v', '--verbose'):
            options.verbose += 1
        elif opt in ('-T', '--storage_types'):
            print_types()
            sys.exit(0)
        elif opt in ('-S', '--stype'):
            options.stype = arg
        elif opt in ('-D', '--dtype'):
            options.dtype = arg
        elif opt in ('-o', '--output'):
            options.outfile = arg
        elif opt in ('-p', '--profile'):
            options.profilep = True
        elif opt in ('-m', '--max'):
            options.maxtxn = int(arg)
        elif opt in ('-k', '--skip'):
            options.skiptxn = int(arg)
        elif opt in ('-t', '--timestamps'):
            options.timestamps = True

    if len(args) > 2:
        error(2, "too many arguments")

    srckws = {}
    if len(args) > 0:
        srcargs = args[0]
        for kv in re.split(r';\s*', srcargs):
            key, val = kv.split('=')
            srckws[key] = val

    destkws = {}
    if len(args) > 1:
        destargs = args[1]
        for kv in re.split(r';\s*', destargs):
            key, val = kv.split('=')
            destkws[key] = val

    if options.stype not in StorageTypes.storage_types.keys():
        usage(2, 'Source database type must be provided')
    if options.dtype not in StorageTypes.storage_types.keys():
        usage(2, 'Destination database type must be provided')

    # Open the output file
    if options.outfile is None:
        options.outfp = sys.stdout
        options.outclosep = False
    else:
        options.outfp = open(options.outfile, 'w')
        options.outclosep = True

    if options.verbose > 0:
        print 'Opening source database...'
    modname, sconv = StorageTypes.storage_types[options.stype]
    kw = sconv(**srckws)
    __import__(modname)
    sclass = getattr(sys.modules[modname], options.stype)
    srcdb = sclass(**kw)

    if options.verbose > 0:
        print 'Opening destination database...'
    modname, dconv = StorageTypes.storage_types[options.dtype]
    kw = dconv(**destkws)
    __import__(modname)
    dclass = getattr(sys.modules[modname], options.dtype)
    dstdb = dclass(**kw)

    try:
        t0 = time.time()
        doit(srcdb, dstdb, options)
        t1 = time.time()
        if options.verbose > 0:
            print 'Migration time:          %8.3f' % (t1-t0)
    finally:
        # Done
        srcdb.close()
        dstdb.close()
        if options.outclosep:
            options.outfp.close()



def doit(srcdb, dstdb, options):
    outfp = options.outfp
    profilep = options.profilep
    verbose = options.verbose
    # some global information
    largest_pickle = 0
    largest_txn_in_size = 0
    largest_txn_in_objects = 0
    total_pickle_size = 0L
    total_object_count = 0
    # Ripped from BaseStorage.copyTransactionsFrom()
    ts = None
    ok = True
    prevrevids = {}
    counter = 0
    skipper = 0
    if options.timestamps:
        print "%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % (
            "NUM", "TID AS TIMESTAMP", "OBJS", "BYTES",
            # Does anybody know what these times mean?
            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
    else:
        print "%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % (
            "NUM", "TRANSACTION ID", "OBJS", "BYTES",
            # Does anybody know what these times mean?
            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")
    for txn in srcdb.iterator():
        skipper += 1
        if skipper <= options.skiptxn:
            continue
        counter += 1
        if counter > options.maxtxn >= 0:
            break
        tid = txn.tid
        if ts is None:
            ts = TimeStamp(tid)
        else:
            t = TimeStamp(tid)
            if t <= ts:
                if ok:
                    print >> sys.stderr, (
                        'Time stamps are out of order %s, %s' % (ts, t))
                    ok = False
                    ts = t.laterThan(ts)
                    tid = `ts`
                else:
                    ts = t
                    if not ok:
                        print >> sys.stderr, (
                            'Time stamps are back in order %s' % t)
                        ok = True
        if verbose > 1:
            print ts

        prof = None
        if profilep and (counter % 100) == 0:
            prof = profile.Profile()
        objects = 0
        size = 0
        newrevids = RevidAccumulator()
        t0 = time.time()
        dstdb.tpc_begin(txn, tid, txn.status)
        t1 = time.time()
        for r in txn:
            oid = r.oid
            objects += 1
            thissize = len(r.data)
            size += thissize
            if thissize > largest_pickle:
                largest_pickle = thissize
            if verbose > 1:
                if not r.version:
                    vstr = 'norev'
                else:
                    vstr = r.version
                print utils.U64(oid), vstr, len(r.data)
            oldrevid = prevrevids.get(oid, ZERO)
            result = dstdb.store(oid, oldrevid, r.data, r.version, txn)
            newrevids.store(oid, result)
        t2 = time.time()
        result = dstdb.tpc_vote(txn)
        t3 = time.time()
        newrevids.tpc_vote(result)
        prevrevids.update(newrevids.get_dict())
        # Profile every 100 transactions
        if prof:
            prof.runcall(dstdb.tpc_finish, txn)
        else:
            dstdb.tpc_finish(txn)
        t4 = time.time()

        # record the results
        if objects > largest_txn_in_objects:
            largest_txn_in_objects = objects
        if size > largest_txn_in_size:
            largest_txn_in_size = size
        if options.timestamps:
            tidstr = str(TimeStamp(tid))
            format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f"
        else:
            tidstr = utils.U64(tid)
            format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f"
        print >> outfp, format % (skipper, tidstr, objects, size,
                                  t4-t0, t1-t0, t2-t1, t3-t2, t4-t3)
        total_pickle_size += size
        total_object_count += objects

        if prof:
            prof.create_stats()
            fp = open('profile-%02d.txt' % (counter / 100), 'wb')
            marshal.dump(prof.stats, fp)
            fp.close()
    print >> outfp, "Largest pickle:          %8d" % largest_pickle
    print >> outfp, "Largest transaction:     %8d" % largest_txn_in_size
    print >> outfp, "Largest object count:    %8d" % largest_txn_in_objects
    print >> outfp, "Total pickle size: %14d" % total_pickle_size
    print >> outfp, "Total object count:      %8d" % total_object_count



# helper to deal with differences between old-style store() return and
# new-style store() return that supports ZEO
import types

class RevidAccumulator:

    def __init__(self):
        self.data = {}

    def _update_from_list(self, list):
        for oid, serial in list:
            if not isinstance(serial, types.StringType):
                raise serial
            self.data[oid] = serial

    def store(self, oid, result):
        if isinstance(result, types.StringType):
            self.data[oid] = result
        elif result is not None:
            self._update_from_list(result)

    def tpc_vote(self, result):
        if result is not None:
            self._update_from_list(result)

    def get_dict(self):
        return self.data



if __name__ == '__main__':
    main()