This file is indexed.

/usr/lib/bup/cmd/bup-midx is in bup 0.29-3.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/python2.7

import glob, math, os, resource, struct, sys, tempfile

from bup import options, git, midx, _helpers, xstat
from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
                         handle_ctrl_c, log, mmap_readwrite, qprogress,
                         saved_errors, unlink)


PAGE_SIZE=4096
SHA_PER_PAGE=PAGE_SIZE/20.

optspec = """
bup midx [options...] <idxnames...>
--
o,output=  output midx filename (default: auto-generated)
a,auto     automatically use all existing .midx/.idx files as input
f,force    merge produce exactly one .midx containing all objects
p,print    print names of generated midx files
check      validate contents of the given midx files (with -a, all midx files)
max-files= maximum number of idx files to open at once [-1]
d,dir=     directory containing idx/midx files
"""

merge_into = _helpers.merge_into


def _group(l, count):
    for i in xrange(0, len(l), count):
        yield l[i:i+count]
        
        
def max_files():
    mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
    if mf > 32:
        mf -= 20  # just a safety margin
    else:
        mf -= 6   # minimum safety margin
    return mf


def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename,
                         str(e).encode('hex'), str(prev).encode('hex')))
        prev = e


_first = None
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write('MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)

            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write('\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename


def do_midx(outdir, outfilename, infilenames, prefixstr):
    rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
    if rv and opt['print']:
        print rv[1]


def do_midx_dir(path, outfilename):
    already = {}
    sizes = {}
    if opt.force and not opt.auto:
        midxs = []   # don't use existing midx files
    else:
        midxs = glob.glob('%s/*.midx' % path)
        contents = {}
        for mname in midxs:
            m = git.open_idx(mname)
            contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
            sizes[mname] = len(m)
                    
        # sort the biggest+newest midxes first, so that we can eliminate
        # smaller (or older) redundant ones that come later in the list
        midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
        
        for mname in midxs:
            any = 0
            for iname in contents[mname]:
                if not already.get(iname):
                    already[iname] = 1
                    any = 1
            if not any:
                debug1('%r is redundant\n' % mname)
                unlink(mname)
                already[mname] = 1

    midxs = [k for k in midxs if not already.get(k)]
    idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]

    for iname in idxs:
        i = git.open_idx(iname)
        sizes[iname] = len(i)

    all = [(sizes[n],n) for n in (midxs + idxs)]
    
    # FIXME: what are the optimal values?  Does this make sense?
    DESIRED_HWM = opt.force and 1 or 5
    DESIRED_LWM = opt.force and 1 or 2
    existed = dict((name,1) for sz,name in all)
    debug1('midx: %d indexes; want no more than %d.\n' 
           % (len(all), DESIRED_HWM))
    if len(all) <= DESIRED_HWM:
        debug1('midx: nothing to do.\n')
    while len(all) > DESIRED_HWM:
        all.sort()
        part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
        part2 = all[len(all)-DESIRED_LWM+1:]
        all = list(do_midx_group(path, outfilename, part1)) + part2
        if len(all) > DESIRED_HWM:
            debug1('\nStill too many indexes (%d > %d).  Merging again.\n'
                   % (len(all), DESIRED_HWM))

    if opt['print']:
        for sz,name in all:
            if not existed.get(name):
                print name


def do_midx_group(outdir, outfilename, infiles):
    groups = list(_group(infiles, opt.max_files))
    gprefix = ''
    for n,sublist in enumerate(groups):
        if len(groups) != 1:
            gprefix = 'Group %d: ' % (n+1)
        rv = _do_midx(outdir, outfilename, sublist, gprefix)
        if rv:
            yield rv


handle_ctrl_c()

o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if extra and (opt.auto or opt.force):
    o.fatal("you can't use -f/-a and also provide filenames")
if opt.check and (not extra and not opt.auto):
    o.fatal("if using --check, you must provide filenames or -a")

git.check_repo_or_die()

if opt.max_files < 0:
    opt.max_files = max_files()
assert(opt.max_files >= 5)

if opt.check:
    # check existing midx files
    if extra:
        midxes = extra
    else:
        midxes = []
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            midxes += glob.glob(os.path.join(path, '*.midx'))
    for name in midxes:
        check_midx(name)
    if not saved_errors:
        log('All tests passed.\n')
else:
    if extra:
        do_midx(git.repo('objects/pack'), opt.output, extra, '')
    elif opt.auto or opt.force:
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            do_midx_dir(path, opt.output)
    else:
        o.fatal("you must use -f or -a or provide input filenames")

if saved_errors:
    log('WARNING: %d errors encountered.\n' % len(saved_errors))
    sys.exit(1)