def open_idx(filename): if filename.endswith('.idx'): f = open(filename, 'rb') header = f.read(8) if header[0:4] == '\377tOc': version = struct.unpack('!I', header[4:8])[0] if version == 2: return PackIdxV2(filename, f) else: raise GitError('%s: expected idx file version 2, got %d' % (filename, version)) elif len(header) == 8 and header[0:4] < '\377tOc': return PackIdxV1(filename, f) else: raise GitError('%s: unrecognized idx file header' % filename) elif filename.endswith('.midx'): return midx.PackMidx(filename) else: raise GitError('idx filenames must end with .idx or .midx')
def refresh(self, skip_midx=False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The instance variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ if self.bloom is not None: self.bloom.close() self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or self.ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] midxes = set(glob.glob(os.path.join(self.dir, b'*.midx'))) # remove any *.midx files from our list that no longer exist for ix in list(d.values()): if not isinstance(ix, midx.PackMidx): continue if ix.name in midxes: continue # remove the midx del d[ix.name] ix.close() self.packs.remove(ix) for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in midxes: if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' ' used by %s\n') % (path_msg(n), path_msg(mxf))) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort( key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % path_msg(os.path.basename(ix.name))) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir, b'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, b'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(reverse=True, key=lambda x: len(x)) if self.bloom and self.bloom.valid() and len( self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs) != 1 and 'es' or ''))
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert (outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x, y: cmp(str(y[0][y[2]:y[2] + 20]), str(x[0][x[2]:x[2] + 20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + ': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total / SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits)) unlink(outfilename) f = open(outfilename + '.tmp', 'w+b') f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert (f.tell() == 12) f.truncate(12 + 4 * entries + 20 * total + 4 * total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) f.close() os.rename(outfilename + '.tmp', outfilename) # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert (len(p.idxnames) == len(infilenames)) print p.idxnames assert (len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = pi.next() assert (i == pin) assert (p.exists(i)) return total, outfilename