/usr/lib/bup/cmd/bup-midx is in bup 0.29-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | #!/usr/bin/python2.7
import glob, math, os, resource, struct, sys, tempfile
from bup import options, git, midx, _helpers, xstat
from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
handle_ctrl_c, log, mmap_readwrite, qprogress,
saved_errors, unlink)
optspec = """
bup midx [options...] <idxnames...>
o,output= output midx filename (default: auto-generated)
a,auto automatically use all existing .midx/.idx files as input
f,force merge produce exactly one .midx containing all objects
p,print print names of generated midx files
check validate contents of the given midx files (with -a, all midx files)
max-files= maximum number of idx files to open at once [-1]
d,dir= directory containing idx/midx files
merge_into = _helpers.merge_into
def _group(l, count):
for i in xrange(0, len(l), count):
yield l[i:i+count]
def max_files():
mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
if mf > 32:
mf -= 20 # just a safety margin
mf -= 6 # minimum safety margin
return mf
def check_midx(name):
nicename = git.repo_rel(name)
log('Checking %s.\n' % nicename)
ix = git.open_idx(name)
except git.GitError as e:
add_error('%s: %s' % (name, e))
for count,subname in enumerate(ix.idxnames):
sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
for ecount,e in enumerate(sub):
if not (ecount % 1234):
qprogress(' %d/%d: %s %d/%d\r'
% (count, len(ix.idxnames),
git.shorten_hash(subname), ecount, len(sub)))
if not sub.exists(e):
add_error("%s: %s: %s missing from idx"
% (nicename, git.shorten_hash(subname),
if not ix.exists(e):
add_error("%s: %s: %s missing from midx"
% (nicename, git.shorten_hash(subname),
prev = None
for ecount,e in enumerate(ix):
if not (ecount % 1234):
qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
if not e >= prev:
add_error('%s: ordering error: %s < %s'
% (nicename,
str(e).encode('hex'), str(prev).encode('hex')))
prev = e
_first = None
def _do_midx(outdir, outfilename, infilenames, prefixstr):
global _first
if not outfilename:
sum = Sha1('\0'.join(infilenames)).hexdigest()
outfilename = '%s/midx-%s.midx' % (outdir, sum)
inp = []
total = 0
allfilenames = []
midxs = []
for name in infilenames:
ix = git.open_idx(name)
isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
for n in ix.idxnames:
total += len(ix)
inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
if not _first: _first = outdir
dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
debug1('midx: %s%screating from %d files (%d objects).\n'
% (dirprefix, prefixstr, len(infilenames), total))
if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
or ((opt.auto or opt.force) and len(infilenames) < 2) \
or (opt.force and not total):
debug1('midx: nothing to do.\n')
pages = int(total/SHA_PER_PAGE) or 1
bits = int(math.ceil(math.log(pages, 2)))
entries = 2**bits
debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
with atomically_replaced_file(outfilename, 'wb') as f:
f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
assert(f.tell() == 12)
f.truncate(12 + 4*entries + 20*total + 4*total)
fmap = mmap_readwrite(f, close=False)
count = merge_into(fmap, bits, total, inp)
del fmap # Assume this calls msync() now.
f.seek(0, os.SEEK_END)
for ix in midxs:
if isinstance(ix, midx.PackMidx):
midxs = None
inp = None
# This is just for testing (if you enable this, don't clear inp above)
if 0:
p = midx.PackMidx(outfilename)
assert(len(p.idxnames) == len(infilenames))
print p.idxnames
assert(len(p) == total)
for pe, e in p, git.idxmerge(inp, final_progress=False):
pin = pi.next()
assert(i == pin)
return total, outfilename
def do_midx(outdir, outfilename, infilenames, prefixstr):
rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
if rv and opt['print']:
print rv[1]
def do_midx_dir(path, outfilename):
already = {}
sizes = {}
if opt.force and not opt.auto:
midxs = [] # don't use existing midx files
midxs = glob.glob('%s/*.midx' % path)
contents = {}
for mname in midxs:
m = git.open_idx(mname)
contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
sizes[mname] = len(m)
# sort the biggest+newest midxes first, so that we can eliminate
# smaller (or older) redundant ones that come later in the list
midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
for mname in midxs:
any = 0
for iname in contents[mname]:
if not already.get(iname):
already[iname] = 1
any = 1
if not any:
debug1('%r is redundant\n' % mname)
already[mname] = 1
midxs = [k for k in midxs if not already.get(k)]
idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
for iname in idxs:
i = git.open_idx(iname)
sizes[iname] = len(i)
all = [(sizes[n],n) for n in (midxs + idxs)]
# FIXME: what are the optimal values? Does this make sense?
DESIRED_HWM = opt.force and 1 or 5
DESIRED_LWM = opt.force and 1 or 2
existed = dict((name,1) for sz,name in all)
debug1('midx: %d indexes; want no more than %d.\n'
% (len(all), DESIRED_HWM))
if len(all) <= DESIRED_HWM:
debug1('midx: nothing to do.\n')
while len(all) > DESIRED_HWM:
part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
part2 = all[len(all)-DESIRED_LWM+1:]
all = list(do_midx_group(path, outfilename, part1)) + part2
if len(all) > DESIRED_HWM:
debug1('\nStill too many indexes (%d > %d). Merging again.\n'
% (len(all), DESIRED_HWM))
if opt['print']:
for sz,name in all:
if not existed.get(name):
print name
def do_midx_group(outdir, outfilename, infiles):
groups = list(_group(infiles, opt.max_files))
gprefix = ''
for n,sublist in enumerate(groups):
if len(groups) != 1:
gprefix = 'Group %d: ' % (n+1)
rv = _do_midx(outdir, outfilename, sublist, gprefix)
if rv:
yield rv
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
if extra and (opt.auto or opt.force):
o.fatal("you can't use -f/-a and also provide filenames")
if opt.check and (not extra and not opt.auto):
o.fatal("if using --check, you must provide filenames or -a")
if opt.max_files < 0:
opt.max_files = max_files()
assert(opt.max_files >= 5)
if opt.check:
# check existing midx files
if extra:
midxes = extra
midxes = []
paths = opt.dir and [opt.dir] or git.all_packdirs()
for path in paths:
debug1('midx: scanning %s\n' % path)
midxes += glob.glob(os.path.join(path, '*.midx'))
for name in midxes:
if not saved_errors:
log('All tests passed.\n')
if extra:
do_midx(git.repo('objects/pack'), opt.output, extra, '')
elif opt.auto or opt.force:
paths = opt.dir and [opt.dir] or git.all_packdirs()
for path in paths:
debug1('midx: scanning %s\n' % path)
do_midx_dir(path, opt.output)
o.fatal("you must use -f or -a or provide input filenames")
if saved_errors:
log('WARNING: %d errors encountered.\n' % len(saved_errors))