def setUp(self): self.dag = gitdag([ ('B', ('A',)), ('C', ('A',)), ('D', ('B',)), ('E', ('B',)), ('F', ('B', 'C')), ('G', ('D',)), ('H', ('D',)), ('I', ('F',)), ('J', ('F',)), ])
def _ensure_ready(self): assert hasattr(self, '_store') if self._store is None: return store = self._store self._store = None raw_unbundler = unbundler(self._bundle) self._dag = gitdag() branches = set() chunks = [] def iter_and_store(iterator): for item in iterator: chunks.append(item) yield item changeset_chunks = ChunksCollection( progress_iter( 'Analyzing {} changesets from ' + fsdecode(self._file), iter_and_store(next(raw_unbundler, None)))) for chunk in changeset_chunks.iter_initialized(lambda x: x, store.changeset, Changeset.from_chunk): extra = chunk.extra or {} branch = extra.get(b'branch', b'default') branches.add(branch) self._dag.add( chunk.node, tuple(p for p in (chunk.parent1, chunk.parent2) if p != NULL_NODE_ID), branch) self._heads = tuple( reversed( [unhexlify(h) for h in self._dag.all_heads(with_tags=False)])) self._branchmap = defaultdict(list) for tag, node in self._dag.all_heads(): self._branchmap[tag].append(unhexlify(node)) def repo_unbundler(): yield iter(chunks) yield next(raw_unbundler, None) yield next(raw_unbundler, None) if next(raw_unbundler, None) is not None: assert False self._unbundler = repo_unbundler()
def _ensure_ready(self): assert hasattr(self, '_store') if self._store is None: return store = self._store self._store = None raw_unbundler = unbundler(self._bundle) self._dag = gitdag() branches = set() chunks = [] def iter_and_store(iterator): for item in iterator: chunks.append(item) yield item changeset_chunks = ChunksCollection(progress_iter( 'Analyzing {} changesets from ' + self._file, iter_and_store(next(raw_unbundler, None)))) for chunk in changeset_chunks.iter_initialized(lambda x: x, store.changeset, Changeset.from_chunk): extra = chunk.extra or {} branch = extra.get('branch', 'default') branches.add(branch) self._dag.add(chunk.node, tuple(p for p in (chunk.parent1, chunk.parent2) if p != NULL_NODE_ID), branch) self._heads = tuple(reversed( [unhexlify(h) for h in self._dag.all_heads(with_tags=False)])) self._branchmap = defaultdict(list) for tag, node in self._dag.all_heads(): self._branchmap[tag].append(unhexlify(node)) def repo_unbundler(): yield iter(chunks) yield next(raw_unbundler, None) yield next(raw_unbundler, None) if next(raw_unbundler, None) is not None: assert False self._unbundler = repo_unbundler()
def init(self, store): raw_unbundler = unbundler(self._bundle) self._dag = gitdag() branches = set() chunks = [] def iter_and_store(iterator): for item in iterator: chunks.append(item) yield item changeset_chunks = ChunksCollection( progress_iter('Analyzing %d changesets from ' + self._file, iter_and_store(next(raw_unbundler)))) for chunk in changeset_chunks.iter_initialized(ChangesetInfo, store.changeset): extra = chunk.extra or {} branch = extra.get('branch', 'default') branches.add(branch) self._dag.add( chunk.node, tuple(p for p in (chunk.parent1, chunk.parent2) if p != NULL_NODE_ID), branch) self._heads = tuple( reversed( [unhexlify(h) for h in self._dag.all_heads(with_tags=False)])) self._branchmap = defaultdict(list) for tag, node in self._dag.all_heads(): self._branchmap[tag].append(unhexlify(node)) self._tip = unhexlify(chunk.node) def repo_unbundler(): yield chunks yield next(raw_unbundler) yield next(raw_unbundler) if next(raw_unbundler) is not None: assert False self._unbundler = repo_unbundler()
def init(self, store): raw_unbundler = unbundler(self._bundle) self._dag = gitdag() branches = set() chunks = [] def iter_and_store(iterator): for item in iterator: chunks.append(item) yield item changeset_chunks = ChunksCollection(progress_iter( 'Analyzing %d changesets from ' + self._file, iter_and_store(next(raw_unbundler)))) for chunk in changeset_chunks.iter_initialized(ChangesetInfo, store.changeset): extra = chunk.extra or {} branch = extra.get('branch', 'default') branches.add(branch) self._dag.add(chunk.node, tuple(p for p in (chunk.parent1, chunk.parent2) if p != NULL_NODE_ID), branch) self._heads = tuple(reversed( [unhexlify(h) for h in self._dag.all_heads(with_tags=False)])) self._branchmap = defaultdict(list) for tag, node in self._dag.all_heads(): self._branchmap[tag].append(unhexlify(node)) self._tip = unhexlify(chunk.node) def repo_unbundler(): yield chunks yield next(raw_unbundler) yield next(raw_unbundler) if next(raw_unbundler) is not None: assert False self._unbundler = repo_unbundler()
def push(repo, store, what, repo_heads, repo_branches, dry_run=False): def heads(): for sha1 in store.heads(repo_branches): yield '^%s' % store.changeset_ref(sha1) def local_bases(): h = chain(heads(), (w for w in what if w)) for c, t, p in GitHgHelper.rev_list('--topo-order', '--full-history', '--boundary', *h): if c[0] != '-': continue yield store.hg_changeset(c[1:]) for w in what: rev = store.hg_changeset(w) if rev: yield rev common = findcommon(repo, store, set(local_bases())) logging.info('common: %s', common) def revs(): for sha1 in common: yield '^%s' % store.changeset_ref(sha1) revs = chain(revs(), (w for w in what if w)) push_commits = list((c, p) for c, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--parents', '--reverse', *revs)) pushed = False if push_commits: has_root = any(len(p) == 40 for p in push_commits) force = all(v[1] for v in what.values()) if has_root and repo_heads: if not force: raise Exception('Cannot push a new root') else: logging.warn('Pushing a new root') if force: repo_heads = ['force'] else: if not repo_heads: repo_heads = [NULL_NODE_ID] repo_heads = [unhexlify(h) for h in repo_heads] if push_commits and not dry_run: if repo.local(): repo.local().ui.setconfig('server', 'validate', True) b2caps = bundle2caps(repo) if unbundle20 else {} logging.getLogger('bundle2').debug('%r', b2caps) if b2caps: b2caps['replycaps'] = encodecaps({'error': ['abort']}) cg = create_bundle(store, push_commits, b2caps) if not isinstance(repo, HelperRepo): cg = util.chunkbuffer(cg) if not b2caps: cg = cg1unpacker(cg, 'UN') reply = repo.unbundle(cg, repo_heads, '') if unbundle20 and isinstance(reply, unbundle20): parts = iter(reply.iterparts()) for part in parts: logging.getLogger('bundle2').debug('part: %s', part.type) logging.getLogger('bundle2').debug('params: %r', part.params) if part.type == 'output': sys.stderr.write(part.read()) elif part.type == 'reply:changegroup': # TODO: should check params['in-reply-to'] reply = int(part.params['return']) elif part.type == 'error:abort': raise error.Abort(part.params['message'], hint=part.params.get('hint')) else: logging.getLogger('bundle2').warning( 'ignoring bundle2 part: %s', part.type) pushed = reply != 0 return gitdag(push_commits) if pushed or dry_run else ()
def findcommon(repo, store, hgheads): logger = logging.getLogger('findcommon') logger.debug(hgheads) if not hgheads: return set() sample_size = 100 sample = _sample(hgheads, sample_size) known = repo.known(unhexlify(h) for h in sample) known = set(h for h, k in izip(sample, known) if k) logger.info('initial sample size: %d', len(sample)) if len(known) == len(hgheads): logger.info('all heads known') return hgheads git_heads = set(store.changeset_ref(h) for h in hgheads) git_known = set(store.changeset_ref(h) for h in known) if logger.isEnabledFor(logging.DEBUG): logger.debug('known (sub)set: (%d) %s', len(known), sorted(git_known)) args = ['--topo-order', '--full-history', '--parents'] def revs(): for h in git_known: yield '^%s' % h for h in git_heads: if h not in git_known: yield h args.extend(revs()) revs = ((c, parents) for c, t, parents in GitHgHelper.rev_list(*args)) dag = gitdag(chain(revs, ((k, ()) for k in git_known))) dag.tag_nodes_and_parents(git_known, 'known') def log_dag(tag): if not logger.isEnabledFor(logging.DEBUG): return logger.debug('%s dag size: %d', tag, sum(1 for n in dag.iternodes(tag))) heads = sorted(dag.heads(tag)) logger.debug('%s dag heads: (%d) %s', tag, len(heads), heads) roots = sorted(dag.roots(tag)) logger.debug('%s dag roots: (%d) %s', tag, len(roots), roots) log_dag('unknown') log_dag('known') while True: unknown = set(chain(dag.heads(), dag.roots())) if not unknown: break sample = set(_sample(unknown, sample_size)) if len(sample) < sample_size: sample |= set(_sample(set(dag.iternodes()), sample_size - len(sample))) sample = list(sample) hg_sample = [store.hg_changeset(h) for h in sample] known = repo.known(unhexlify(h) for h in hg_sample) unknown = set(h for h, k in izip(sample, known) if not k) known = set(h for h, k in izip(sample, known) if k) logger.info('next sample size: %d', len(sample)) if logger.isEnabledFor(logging.DEBUG): logger.debug('known (sub)set: (%d) %s', len(known), sorted(known)) logger.debug('unknown (sub)set: (%d) %s', len(unknown), sorted(unknown)) dag.tag_nodes_and_parents(known, 'known') dag.tag_nodes_and_children(unknown, 'unknown') log_dag('unknown') log_dag('known') return [store.hg_changeset(h) for h in dag.heads('known')]
def fsck(args): '''check cinnabar metadata consistency''' if not args.commit and not args.full: return fsck_quick(args.force) status = FsckStatus() store = GitHgStore() if args.full and args.commit: logging.error('Cannot pass both --full and a commit') return 1 if args.commit: commits = set() all_git_commits = {} for c in args.commit: cs = store.hg_changeset(c) if cs: commits.add(c) c = cs.node commit = GitHgHelper.hg2git(c) if commit == NULL_NODE_ID and not cs: status.info('Unknown commit or changeset: %s' % c) return 1 if not cs: cs = store.hg_changeset(commit) commits.add(commit) all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits) else: all_refs = dict( (ref, sha1) for sha1, ref in Git.for_each_ref('refs/cinnabar')) if 'refs/cinnabar/metadata' in all_refs: git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata'] else: assert False all_git_commits = GitHgHelper.rev_list('--topo-order', '--full-history', '--reverse', git_heads) dag = gitdag() GitHgHelper.reset_heads('manifests') full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG) for node, tree, parents in progress_iter('Checking {} changesets', all_git_commits): node = store._replace.get(node, node) hg_node = store.hg_changeset(node) if not hg_node: status.report('Missing note for git commit: ' + node) continue GitHgHelper.seen('git2hg', node) changeset_data = store.changeset(hg_node) changeset = changeset_data.node GitHgHelper.seen('hg2git', changeset) changeset_ref = store.changeset_ref(changeset) if not changeset_ref: status.report('Missing changeset in hg2git branch: %s' % changeset) continue elif str(changeset_ref) != node: status.report('Commit mismatch for changeset %s\n' ' hg2git: %s\n commit: %s' % (changeset, changeset_ref, node)) hg_changeset = store.changeset(changeset, include_parents=True) if hg_changeset.node != hg_changeset.sha1: status.report('Sha1 mismatch for changeset %s' % changeset) dag.add(hg_changeset.node, (hg_changeset.parent1, hg_changeset.parent2), changeset_data.branch or 'default') raw_changeset = Changeset.from_git_commit(node) patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data) if patcher != store.read_changeset_data(node): status.fix('Adjusted changeset metadata for %s' % changeset) GitHgHelper.set('changeset', changeset, NULL_NODE_ID) GitHgHelper.set('changeset', changeset, node) GitHgHelper.put_blob(patcher, want_sha1=False) GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID) GitHgHelper.set('changeset-metadata', changeset, ':1') manifest = changeset_data.manifest if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID: continue manifest_ref = store.manifest_ref(manifest) if not manifest_ref: status.report('Missing manifest in hg2git branch: %s' % manifest) parents = tuple( store.changeset(p).manifest for p in hg_changeset.parents) git_parents = tuple( store.manifest_ref(p) for p in parents if p != NULL_NODE_ID) # This doesn't change the value but makes the helper track the manifest # dag. GitHgHelper.set('manifest', manifest, manifest_ref) if not GitHgHelper.check_manifest(manifest): status.report('Sha1 mismatch for manifest %s' % manifest) manifest_commit_parents = GitCommit(manifest_ref).parents if sorted(manifest_commit_parents) != sorted(git_parents): # TODO: better error status.report( '%s(%s) %s != %s' % (manifest, manifest_ref, manifest_commit_parents, git_parents)) # TODO: check that manifest content matches changeset content changes = get_changes(manifest_ref, git_parents) for path, hg_file, hg_fileparents in changes: if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE or GitHgHelper.seen( 'hg2git', hg_file)): if full_file_check: file = store.file(hg_file, hg_fileparents, git_parents, store.manifest_path(path)) valid = file.node == file.sha1 else: valid = GitHgHelper.check_file(hg_file, *hg_fileparents) if not valid: status.report('Sha1 mismatch for file %s in manifest %s' % (hg_file, manifest_ref)) if not args.commit and not status('broken'): store_manifest_heads = set(store._manifest_heads_orig) manifest_heads = set(GitHgHelper.heads('manifests')) if store_manifest_heads != manifest_heads: def iter_manifests(a, b): for h in a - b: yield h for h in b: yield '^%s' % h for m, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--reverse', *iter_manifests(manifest_heads, store_manifest_heads)): status.fix('Missing manifest commit in manifest branch: %s' % m) for m, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--reverse', *iter_manifests(store_manifest_heads, manifest_heads)): status.fix('Removing metadata commit %s with no corresponding ' 'changeset' % (m)) for h in store_manifest_heads - manifest_heads: if GitHgHelper.seen('hg2git', store.hg_manifest(h)): status.fix('Removing non-head reference to %s in manifests' ' metadata.' % h) dangling = () if not args.commit and not status('broken'): dangling = GitHgHelper.dangling('hg2git') for obj in dangling: status.fix('Removing dangling metadata for ' + obj) # Theoretically, we should figure out if they are files, manifests # or changesets and set the right variable accordingly, but in # practice, it makes no difference. Reevaluate when GitHgStore.close # is modified, though. GitHgHelper.set('file', obj, NULL_NODE_ID) GitHgHelper.set('file-meta', obj, NULL_NODE_ID) if not args.commit and not status('broken'): dangling = GitHgHelper.dangling('git2hg') for c in dangling: status.fix('Removing dangling note for commit ' + c) GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID) if status('broken'): status.info( 'Your git-cinnabar repository appears to be corrupted. There\n' 'are known issues in older revisions that have been fixed.\n' 'Please try running the following command to reset:\n' ' git cinnabar reclone\n\n' 'Please note this command may change the commit sha1s. Your\n' 'local branches will however stay untouched.\n' 'Please report any corruption that fsck would detect after a\n' 'reclone.') if not args.commit: status.info('Checking head references...') computed_heads = defaultdict(set) for branch, head in dag.all_heads(): computed_heads[branch].add(head) for branch in sorted(dag.tags()): stored_heads = store.heads({branch}) for head in computed_heads[branch] - stored_heads: status.fix('Adding missing head %s in branch %s' % (head, branch)) store.add_head(head) for head in stored_heads - computed_heads[branch]: status.fix('Removing non-head reference to %s in branch %s' % (head, branch)) del store._hgheads[head] metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') if status('broken'): Git.update_ref('refs/cinnabar/broken', metadata_commit) return 1 if args.full: Git.update_ref('refs/cinnabar/checked', metadata_commit) interval_expired('fsck', 0) store.close() if status('fixed'): return 2 return 0
def fsck(args): parser = argparse.ArgumentParser() parser.add_argument( '--manifests', action='store_true', help='Validate manifests hashes') parser.add_argument( '--files', action='store_true', help='Validate files hashes') parser.add_argument( 'commit', nargs='*', help='Specific commit or changeset to check') args = parser.parse_args(args) status = { 'broken': False, 'fixed': False, } def info(message): sys.stderr.write('\r') print message def fix(message): status['fixed'] = True info(message) def report(message): status['broken'] = True info(message) store = GitHgStore() store.init_fast_import(lambda: FastImport()) if args.commit: all_hg2git = {} all_notes = set() commits = set() all_git_commits = {} for c in args.commit: data = store.read_changeset_data(c) if data: all_notes.add(c) commits.add(c) c = data['changeset'] commit = GitHgHelper.hg2git(c) if commit == NULL_NODE_ID and not data: info('Unknown commit or changeset: %s' % c) return 1 if commit != NULL_NODE_ID: all_hg2git[c] = commit, 'commit' if not data: data = store.read_changeset_data(commit) commits.add(commit) if data: all_notes.add(commit) all_git_commits = Git.iter( 'log', '--no-walk=unsorted', '--stdin', '--format=%T %H', stdin=commits) else: all_hg2git = { path.replace('/', ''): (filesha1, intern(typ)) for mode, typ, filesha1, path in progress_iter('Reading %d mercurial to git mappings', Git.ls_tree('refs/cinnabar/hg2git', recursive=True)) } all_notes = set(path.replace('/', '') for mode, typ, filesha1, path in progress_iter( 'Reading %d commit to changeset mappings', Git.ls_tree('refs/notes/cinnabar', recursive=True))) manifest_commits = OrderedDict((m, None) for m in progress_iter( 'Reading %d manifest trees', Git.iter('rev-list', '--full-history', '--topo-order', 'refs/cinnabar/manifest')) ) all_git_heads = Git.for_each_ref('refs/cinnabar/branches', format='%(refname)') all_git_commits = Git.iter('log', '--topo-order', '--full-history', '--reverse', '--stdin', '--format=%T %H', stdin=all_git_heads) store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()} seen_changesets = set() seen_manifests = set() seen_manifest_refs = {} seen_files = set() seen_notes = set() hg_manifest = None dag = gitdag() for line in progress_iter('Checking %d changesets', all_git_commits): tree, node = line.split(' ') if node not in all_notes: report('Missing note for git commit: ' + node) continue seen_notes.add(node) changeset_data = store.read_changeset_data(node) changeset = changeset_data['changeset'] if 'extra' in changeset_data: extra = changeset_data['extra'] header, message = GitHgHelper.cat_file( 'commit', node).split('\n\n', 1) header = dict(l.split(' ', 1) for l in header.splitlines()) if 'committer' in extra: committer_info = store.hg_author_info(header['committer']) committer = '%s %d %d' % committer_info if (committer != extra['committer'] and header['committer'] != extra['committer'] and committer_info[0] != extra['committer']): report('Committer mismatch between commit and metadata for' ' changeset %s' % changeset) if committer == extra['committer']: fix('Fixing useless committer metadata for changeset %s' % changeset) del changeset_data['extra']['committer'] store._changesets[changeset] = LazyString(node) if header['committer'] != header['author'] and not extra: fix('Fixing useless empty extra metadata for changeset %s' % changeset) del changeset_data['extra'] store._changesets[changeset] = LazyString(node) seen_changesets.add(changeset) changeset_ref = store.changeset_ref(changeset) if not changeset_ref: report('Missing changeset in hg2git branch: %s' % changeset) elif str(changeset_ref) != node: report('Commit mismatch for changeset %s\n' ' hg2git: %s\n commit: %s' % (changeset, changeset_ref, node)) hg_changeset = store.changeset(changeset, include_parents=True) sha1 = hg_changeset.sha1 if hg_changeset.node != sha1: try_fixup = False if (changeset, sha1) in ( ('8c557b7c03a4a753e5c163038f04862e9f65fce1', '249b59139de8e08abeb6c4e261a137c756e7af0e'), ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf', '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'), ): header = hg_changeset.data.split('\n', 4) start = sum(len(h) for h in header[:3]) + 1 changeset_data['patch'] = ((start, start + 1, '1'),) try_fixup = True # Some know cases of corruptions involve a whitespace after the # timezone. Adding an empty extra metadata works around those. elif 'extra' not in changeset_data: changeset_data['extra'] = {} try_fixup = True if try_fixup: hg_changeset = store.changeset(changeset, include_parents=True) sha1 = hg_changeset.sha1 if hg_changeset.node == sha1: fix('Fixing known sha1 mismatch for changeset %s' % changeset) store._changesets[changeset] = LazyString(node) if hg_changeset.node != sha1: report('Sha1 mismatch for changeset %s' % changeset) dag.add(hg_changeset.node, (hg_changeset.parent1, hg_changeset.parent2), changeset_data.get('extra', {}).get('branch', 'default')) manifest = changeset_data['manifest'] if manifest in seen_manifests: continue seen_manifests.add(manifest) manifest_ref = store.manifest_ref(manifest) if manifest_ref: seen_manifest_refs[manifest_ref] = manifest if not manifest_ref: report('Missing manifest in hg2git branch: %s' % manifest) elif not args.commit and manifest_ref not in manifest_commits: report('Missing manifest commit in manifest branch: %s' % manifest_ref) if args.manifests or args.files: parents = tuple( store.read_changeset_data(store.changeset_ref(p))['manifest'] for p in (hg_changeset.parent1, hg_changeset.parent2) if p != NULL_NODE_ID ) if args.manifests: try: with GitHgHelper.query('check-manifest', manifest, *parents) as stdout: if stdout.readline().strip() != 'ok': report('Sha1 mismatch for manifest %s' % manifest) except NoHelperException: hg_manifest = store.manifest(manifest) hg_manifest.set_parents(*parents) if hg_manifest.node != hg_manifest.sha1: report('Sha1 mismatch for manifest %s' % manifest) git_ls = one(Git.ls_tree(manifest_ref, 'git')) if git_ls: mode, typ, sha1, path = git_ls else: header, message = GitHgHelper.cat_file( 'commit', manifest_ref).split('\n\n', 1) header = dict(l.split(' ', 1) for l in header.splitlines()) if header['tree'] == EMPTY_TREE: sha1 = EMPTY_TREE else: report('Missing git tree in manifest commit %s' % manifest_ref) sha1 = None if sha1 and sha1 != tree: report('Tree mismatch between manifest commit %s and commit %s' % (manifest_ref, node)) if args.files: changes = get_changes( manifest_ref, tuple(store.manifest_ref(p) for p in parents), 'hg') for path, hg_file, hg_fileparents in changes: if hg_file != NULL_NODE_ID and hg_file not in seen_files: file = store.file(hg_file) file.set_parents(*hg_fileparents) if file.node != file.sha1: report('Sha1 mismatch for file %s in manifest %s' % (hg_file, manifest_ref)) seen_files.add(hg_file) if args.files: all_hg2git = set(all_hg2git.iterkeys()) else: all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems() if t == 'commit') adjusted = {} if not args.commit: dangling = set(manifest_commits) - set(seen_manifest_refs) if dangling: def iter_manifests(): removed_one = False yielded = False previous = None for obj in reversed(manifest_commits): if obj in dangling: fix('Removing metadata commit %s with no hg2git entry' % obj) removed_one = True else: if removed_one: yield obj, previous yielded = True previous = obj if removed_one and not yielded: yield obj, False for obj, parent in progress_iter('Adjusting %d metadata commits', iter_manifests()): mark = store._fast_import.new_mark() if parent is False: Git.update_ref('refs/cinnabar/manifest', obj) continue elif parent: parents = (adjusted.get(parent, parent),) with store._fast_import.commit( ref='refs/cinnabar/manifest', parents=parents, mark=mark) as commit: mode, typ, tree, path = store._fast_import.ls(obj) commit.filemodify('', tree, typ='tree') adjusted[obj] = Mark(mark) dangling = all_hg2git - seen_changesets - seen_manifests - seen_files if dangling or adjusted: with store._fast_import.commit( ref='refs/cinnabar/hg2git', parents=('refs/cinnabar/hg2git^0',)) as commit: for obj in dangling: fix('Removing dangling metadata for ' + obj) commit.filedelete(sha1path(obj)) for obj, mark in progress_iter( 'Updating hg2git for %d metadata commits', adjusted.iteritems()): commit.filemodify(sha1path(seen_manifest_refs[obj]), mark, typ='commit') dangling = all_notes - seen_notes if dangling: with store._fast_import.commit( ref='refs/notes/cinnabar', parents=('refs/notes/cinnabar^0',)) as commit: for c in dangling: fix('Removing dangling note for commit ' + c) # That's brute force, but meh. for l in range(0, 10): commit.filedelete(sha1path(c, l)) if status['broken']: info('Your git-cinnabar repository appears to be corrupted. There\n' 'are known issues in older revisions that have been fixed.\n' 'Please try running the following command to reset:\n' ' git cinnabar reclone\n\n' 'Please note this command may change the commit sha1s. Your\n' 'local branches will however stay untouched.\n' 'Please report any corruption that fsck would detect after a\n' 'reclone.') if not args.commit: info('Checking head references...') computed_heads = defaultdict(set) for branch, head in dag.all_heads(): computed_heads[branch].add(head) for branch in sorted(dag.tags()): stored_heads = store.heads({branch}) for head in computed_heads[branch] - stored_heads: fix('Adding missing head %s in branch %s' % (head, branch)) store.add_head(head) for head in stored_heads - computed_heads[branch]: fix('Removing non-head reference to %s in branch %s' % (head, branch)) store._hgheads.remove((branch, head)) store.close() if status['broken']: return 1 if status['fixed']: return 2 return 0
def fsck(args): parser = argparse.ArgumentParser() parser.add_argument('--manifests', action='store_true', help='Validate manifests hashes') parser.add_argument('--files', action='store_true', help='Validate files hashes') parser.add_argument('commit', nargs='*', help='Specific commit or changeset to check') args = parser.parse_args(args) status = { 'broken': False, 'fixed': False, } def info(message): sys.stderr.write('\r') print message def fix(message): status['fixed'] = True info(message) def report(message): status['broken'] = True info(message) store = GitHgStore() store.init_fast_import(lambda: FastImport()) if args.commit: all_hg2git = {} all_notes = set() commits = set() all_git_commits = {} for c in args.commit: data = store.read_changeset_data(c) if data: all_notes.add(c) commits.add(c) c = data['changeset'] commit = GitHgHelper.hg2git(c) if commit == NULL_NODE_ID and not data: info('Unknown commit or changeset: %s' % c) return 1 if commit != NULL_NODE_ID: all_hg2git[c] = commit, 'commit' if not data: data = store.read_changeset_data(commit) commits.add(commit) if data: all_notes.add(commit) all_git_commits = Git.iter('log', '--no-walk=unsorted', '--stdin', '--format=%T %H', stdin=commits) else: all_hg2git = { path.replace('/', ''): (filesha1, intern(typ)) for mode, typ, filesha1, path in progress_iter( 'Reading %d mercurial to git mappings', Git.ls_tree('refs/cinnabar/hg2git', recursive=True)) } all_notes = set( path.replace('/', '') for mode, typ, filesha1, path in progress_iter( 'Reading %d commit to changeset mappings', Git.ls_tree('refs/notes/cinnabar', recursive=True))) manifest_commits = OrderedDict((m, None) for m in progress_iter( 'Reading %d manifest trees', Git.iter('rev-list', '--full-history', '--topo-order', 'refs/cinnabar/manifest'))) all_git_heads = Git.for_each_ref('refs/cinnabar/branches', format='%(refname)') all_git_commits = Git.iter('log', '--topo-order', '--full-history', '--reverse', '--stdin', '--format=%T %H', stdin=all_git_heads) store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()} seen_changesets = set() seen_manifests = set() seen_manifest_refs = {} seen_files = set() seen_notes = set() hg_manifest = None dag = gitdag() for line in progress_iter('Checking %d changesets', all_git_commits): tree, node = line.split(' ') if node not in all_notes: report('Missing note for git commit: ' + node) continue seen_notes.add(node) changeset_data = store.read_changeset_data(node) changeset = changeset_data['changeset'] if 'extra' in changeset_data: extra = changeset_data['extra'] header, message = GitHgHelper.cat_file('commit', node).split('\n\n', 1) header = dict(l.split(' ', 1) for l in header.splitlines()) if 'committer' in extra: committer_info = store.hg_author_info(header['committer']) committer = '%s %d %d' % committer_info if (committer != extra['committer'] and header['committer'] != extra['committer'] and committer_info[0] != extra['committer']): report('Committer mismatch between commit and metadata for' ' changeset %s' % changeset) if committer == extra['committer']: fix('Fixing useless committer metadata for changeset %s' % changeset) del changeset_data['extra']['committer'] store._changesets[changeset] = LazyString(node) if header['committer'] != header['author'] and not extra: fix('Fixing useless empty extra metadata for changeset %s' % changeset) del changeset_data['extra'] store._changesets[changeset] = LazyString(node) seen_changesets.add(changeset) changeset_ref = store.changeset_ref(changeset) if not changeset_ref: report('Missing changeset in hg2git branch: %s' % changeset) elif str(changeset_ref) != node: report('Commit mismatch for changeset %s\n' ' hg2git: %s\n commit: %s' % (changeset, changeset_ref, node)) hg_changeset = store.changeset(changeset, include_parents=True) sha1 = hg_changeset.sha1 if hg_changeset.node != sha1: try_fixup = False if (changeset, sha1) in ( ('8c557b7c03a4a753e5c163038f04862e9f65fce1', '249b59139de8e08abeb6c4e261a137c756e7af0e'), ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf', '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'), ): header = hg_changeset.data.split('\n', 4) start = sum(len(h) for h in header[:3]) + 1 changeset_data['patch'] = ((start, start + 1, '1'), ) try_fixup = True # Some know cases of corruptions involve a whitespace after the # timezone. Adding an empty extra metadata works around those. elif 'extra' not in changeset_data: changeset_data['extra'] = {} try_fixup = True if try_fixup: hg_changeset = store.changeset(changeset, include_parents=True) sha1 = hg_changeset.sha1 if hg_changeset.node == sha1: fix('Fixing known sha1 mismatch for changeset %s' % changeset) store._changesets[changeset] = LazyString(node) if hg_changeset.node != sha1: report('Sha1 mismatch for changeset %s' % changeset) dag.add(hg_changeset.node, (hg_changeset.parent1, hg_changeset.parent2), changeset_data.get('extra', {}).get('branch', 'default')) manifest = changeset_data['manifest'] if manifest in seen_manifests: continue seen_manifests.add(manifest) manifest_ref = store.manifest_ref(manifest) if manifest_ref: seen_manifest_refs[manifest_ref] = manifest if not manifest_ref: report('Missing manifest in hg2git branch: %s' % manifest) elif not args.commit and manifest_ref not in manifest_commits: report('Missing manifest commit in manifest branch: %s' % manifest_ref) if args.manifests or args.files: parents = tuple( store.read_changeset_data(store.changeset_ref(p))['manifest'] for p in (hg_changeset.parent1, hg_changeset.parent2) if p != NULL_NODE_ID) if args.manifests: try: with GitHgHelper.query('check-manifest', manifest, *parents) as stdout: if stdout.readline().strip() != 'ok': report('Sha1 mismatch for manifest %s' % manifest) except NoHelperException: hg_manifest = store.manifest(manifest) hg_manifest.set_parents(*parents) if hg_manifest.node != hg_manifest.sha1: report('Sha1 mismatch for manifest %s' % manifest) git_ls = one(Git.ls_tree(manifest_ref, 'git')) if git_ls: mode, typ, sha1, path = git_ls else: header, message = GitHgHelper.cat_file('commit', manifest_ref).split( '\n\n', 1) header = dict(l.split(' ', 1) for l in header.splitlines()) if header['tree'] == EMPTY_TREE: sha1 = EMPTY_TREE else: report('Missing git tree in manifest commit %s' % manifest_ref) sha1 = None if sha1 and sha1 != tree: report('Tree mismatch between manifest commit %s and commit %s' % (manifest_ref, node)) if args.files: changes = get_changes( manifest_ref, tuple(store.manifest_ref(p) for p in parents), 'hg') for path, hg_file, hg_fileparents in changes: if hg_file != NULL_NODE_ID and hg_file not in seen_files: file = store.file(hg_file) file.set_parents(*hg_fileparents) if file.node != file.sha1: report('Sha1 mismatch for file %s in manifest %s' % (hg_file, manifest_ref)) seen_files.add(hg_file) if args.files: all_hg2git = set(all_hg2git.iterkeys()) else: all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems() if t == 'commit') adjusted = {} if not args.commit: dangling = set(manifest_commits) - set(seen_manifest_refs) if dangling: def iter_manifests(): removed_one = False yielded = False previous = None for obj in reversed(manifest_commits): if obj in dangling: fix('Removing metadata commit %s with no hg2git entry' % obj) removed_one = True else: if removed_one: yield obj, previous yielded = True previous = obj if removed_one and not yielded: yield obj, False for obj, parent in progress_iter('Adjusting %d metadata commits', iter_manifests()): mark = store._fast_import.new_mark() if parent is False: Git.update_ref('refs/cinnabar/manifest', obj) continue elif parent: parents = (adjusted.get(parent, parent), ) with store._fast_import.commit(ref='refs/cinnabar/manifest', parents=parents, mark=mark) as commit: mode, typ, tree, path = store._fast_import.ls(obj) commit.filemodify('', tree, typ='tree') adjusted[obj] = Mark(mark) dangling = all_hg2git - seen_changesets - seen_manifests - seen_files if dangling or adjusted: with store._fast_import.commit( ref='refs/cinnabar/hg2git', parents=('refs/cinnabar/hg2git^0', )) as commit: for obj in dangling: fix('Removing dangling metadata for ' + obj) commit.filedelete(sha1path(obj)) for obj, mark in progress_iter( 'Updating hg2git for %d metadata commits', adjusted.iteritems()): commit.filemodify(sha1path(seen_manifest_refs[obj]), mark, typ='commit') dangling = all_notes - seen_notes if dangling: with store._fast_import.commit( ref='refs/notes/cinnabar', parents=('refs/notes/cinnabar^0', )) as commit: for c in dangling: fix('Removing dangling note for commit ' + c) # That's brute force, but meh. for l in range(0, 10): commit.filedelete(sha1path(c, l)) if status['broken']: info('Your git-cinnabar repository appears to be corrupted. There\n' 'are known issues in older revisions that have been fixed.\n' 'Please try running the following command to reset:\n' ' git cinnabar reclone\n\n' 'Please note this command may change the commit sha1s. Your\n' 'local branches will however stay untouched.\n' 'Please report any corruption that fsck would detect after a\n' 'reclone.') if not args.commit: info('Checking head references...') computed_heads = defaultdict(set) for branch, head in dag.all_heads(): computed_heads[branch].add(head) for branch in sorted(dag.tags()): stored_heads = store.heads({branch}) for head in computed_heads[branch] - stored_heads: fix('Adding missing head %s in branch %s' % (head, branch)) store.add_head(head) for head in stored_heads - computed_heads[branch]: fix('Removing non-head reference to %s in branch %s' % (head, branch)) store._hgheads.remove((branch, head)) store.close() if status['broken']: return 1 if status['fixed']: return 2 return 0
def push(repo, store, what, repo_heads, repo_branches, dry_run=False): def heads(): for sha1 in store.heads(repo_branches): yield b'^%s' % store.changeset_ref(sha1) def local_bases(): h = chain(heads(), (w for w, _, _ in what if w)) for c, t, p in GitHgHelper.rev_list(b'--topo-order', b'--full-history', b'--boundary', *h): if c[:1] != b'-': continue yield store.hg_changeset(c[1:]) for w, _, _ in what: if w: rev = store.hg_changeset(w) if rev: yield rev common = findcommon(repo, store, set(local_bases())) logging.info('common: %s', common) def revs(): for sha1 in common: yield b'^%s' % store.changeset_ref(sha1) revs = chain(revs(), (w for w, _, _ in what if w)) push_commits = list((c, p) for c, t, p in GitHgHelper.rev_list( b'--topo-order', b'--full-history', b'--parents', b'--reverse', *revs)) pushed = False if push_commits: has_root = any(not p for (c, p) in push_commits) force = all(v for _, _, v in what) if has_root and repo_heads: if not force: raise Exception('Cannot push a new root') else: logging.warn('Pushing a new root') if force: repo_heads = [b'force'] else: if not repo_heads: repo_heads = [NULL_NODE_ID] repo_heads = [unhexlify(h) for h in repo_heads] if push_commits and not dry_run: if repo.local(): repo.local().ui.setconfig(b'server', b'validate', True) if unbundle20: b2caps = repo.capable(b'bundle2') or {} else: b2caps = {} if b2caps: b2caps = decodecaps(unquote_to_bytes(b2caps)) logging.getLogger('bundle2').debug('%r', b2caps) if b2caps: b2caps[b'replycaps'] = encodecaps({b'error': [b'abort']}) cg = create_bundle(store, push_commits, b2caps) if not isinstance(repo, HelperRepo): cg = chunkbuffer(cg) if not b2caps: cg = cg1unpacker(cg, b'UN') reply = repo.unbundle(cg, repo_heads, b'') if unbundle20 and isinstance(reply, unbundle20): parts = iter(reply.iterparts()) for part in parts: logging.getLogger('bundle2').debug('part: %s', part.type) logging.getLogger('bundle2').debug('params: %r', part.params) if part.type == b'output': sys.stderr.write(fsdecode(part.read())) elif part.type == b'reply:changegroup': # TODO: should check params['in-reply-to'] reply = int(part.params[b'return']) elif part.type == b'error:abort': message = part.params[b'message'].decode('utf-8') hint = part.params.get(b'hint') if hint: message += '\n\n' + hint.decode('utf-8') raise Exception(message) else: logging.getLogger(b'bundle2').warning( 'ignoring bundle2 part: %s', part.type) pushed = reply != 0 return gitdag(push_commits) if pushed or dry_run else ()
def fsck(args): '''check cinnabar metadata consistency''' if not args.commit and not args.full: return fsck_quick() status = FsckStatus() store = GitHgStore() if args.full and args.commit: logging.error('Cannot pass both --full and a commit') return 1 if args.commit: commits = set() all_git_commits = {} for c in args.commit: cs = store.hg_changeset(c) if cs: commits.add(c) c = cs.node commit = GitHgHelper.hg2git(c) if commit == NULL_NODE_ID and not cs: status.info('Unknown commit or changeset: %s' % c) return 1 if not cs: cs = store.hg_changeset(commit) commits.add(commit) all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits) else: all_refs = dict((ref, sha1) for sha1, ref in Git.for_each_ref('refs/cinnabar')) if 'refs/cinnabar/metadata' in all_refs: git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata'] else: assert False all_git_commits = GitHgHelper.rev_list( '--topo-order', '--full-history', '--reverse', git_heads) dag = gitdag() GitHgHelper.reset_heads('manifests') full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG) for node, tree, parents in progress_iter('Checking {} changesets', all_git_commits): node = store._replace.get(node, node) hg_node = store.hg_changeset(node) if not hg_node: status.report('Missing note for git commit: ' + node) continue GitHgHelper.seen('git2hg', node) changeset_data = store.changeset(hg_node) changeset = changeset_data.node GitHgHelper.seen('hg2git', changeset) changeset_ref = store.changeset_ref(changeset) if not changeset_ref: status.report('Missing changeset in hg2git branch: %s' % changeset) continue elif str(changeset_ref) != node: status.report('Commit mismatch for changeset %s\n' ' hg2git: %s\n commit: %s' % (changeset, changeset_ref, node)) hg_changeset = store.changeset(changeset, include_parents=True) if hg_changeset.node != hg_changeset.sha1: status.report('Sha1 mismatch for changeset %s' % changeset) dag.add(hg_changeset.node, (hg_changeset.parent1, hg_changeset.parent2), changeset_data.branch or 'default') raw_changeset = Changeset.from_git_commit(node) patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data) if patcher != store.read_changeset_data(node): status.fix('Adjusted changeset metadata for %s' % changeset) GitHgHelper.set('changeset', changeset, NULL_NODE_ID) GitHgHelper.set('changeset', changeset, node) GitHgHelper.put_blob(patcher, want_sha1=False) GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID) GitHgHelper.set('changeset-metadata', changeset, ':1') manifest = changeset_data.manifest if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID: continue manifest_ref = store.manifest_ref(manifest) if not manifest_ref: status.report('Missing manifest in hg2git branch: %s' % manifest) parents = tuple( store.changeset(p).manifest for p in hg_changeset.parents ) git_parents = tuple(store.manifest_ref(p) for p in parents if p != NULL_NODE_ID) # This doesn't change the value but makes the helper track the manifest # dag. GitHgHelper.set('manifest', manifest, manifest_ref) if not GitHgHelper.check_manifest(manifest): status.report('Sha1 mismatch for manifest %s' % manifest) manifest_commit_parents = GitCommit(manifest_ref).parents if sorted(manifest_commit_parents) != sorted(git_parents): # TODO: better error status.report('%s(%s) %s != %s' % (manifest, manifest_ref, manifest_commit_parents, git_parents)) # TODO: check that manifest content matches changeset content changes = get_changes(manifest_ref, git_parents) for path, hg_file, hg_fileparents in changes: if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE or GitHgHelper.seen('hg2git', hg_file)): if full_file_check: file = store.file(hg_file, hg_fileparents, git_parents, store.manifest_path(path)) valid = file.node == file.sha1 else: valid = GitHgHelper.check_file(hg_file, *hg_fileparents) if not valid: status.report( 'Sha1 mismatch for file %s in manifest %s' % (hg_file, manifest_ref)) if not args.commit and not status('broken'): store_manifest_heads = set(store._manifest_heads_orig) manifest_heads = set(GitHgHelper.heads('manifests')) if store_manifest_heads != manifest_heads: def iter_manifests(a, b): for h in a - b: yield h for h in b: yield '^%s' % h for m, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--reverse', *iter_manifests(manifest_heads, store_manifest_heads)): status.fix('Missing manifest commit in manifest branch: %s' % m) for m, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--reverse', *iter_manifests(store_manifest_heads, manifest_heads)): status.fix('Removing metadata commit %s with no corresponding ' 'changeset' % (m)) for h in store_manifest_heads - manifest_heads: if GitHgHelper.seen('hg2git', store.hg_manifest(h)): status.fix('Removing non-head reference to %s in manifests' ' metadata.' % h) dangling = () if not args.commit and not status('broken'): dangling = GitHgHelper.dangling('hg2git') for obj in dangling: status.fix('Removing dangling metadata for ' + obj) # Theoretically, we should figure out if they are files, manifests # or changesets and set the right variable accordingly, but in # practice, it makes no difference. Reevaluate when GitHgStore.close # is modified, though. GitHgHelper.set('file', obj, NULL_NODE_ID) GitHgHelper.set('file-meta', obj, NULL_NODE_ID) if not args.commit and not status('broken'): dangling = GitHgHelper.dangling('git2hg') for c in dangling: status.fix('Removing dangling note for commit ' + c) GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID) if status('broken'): status.info( 'Your git-cinnabar repository appears to be corrupted. There\n' 'are known issues in older revisions that have been fixed.\n' 'Please try running the following command to reset:\n' ' git cinnabar reclone\n\n' 'Please note this command may change the commit sha1s. Your\n' 'local branches will however stay untouched.\n' 'Please report any corruption that fsck would detect after a\n' 'reclone.') if not args.commit: status.info('Checking head references...') computed_heads = defaultdict(set) for branch, head in dag.all_heads(): computed_heads[branch].add(head) for branch in sorted(dag.tags()): stored_heads = store.heads({branch}) for head in computed_heads[branch] - stored_heads: status.fix('Adding missing head %s in branch %s' % (head, branch)) store.add_head(head) for head in stored_heads - computed_heads[branch]: status.fix('Removing non-head reference to %s in branch %s' % (head, branch)) del store._hgheads[head] metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') if status('broken'): Git.update_ref('refs/cinnabar/broken', metadata_commit) return 1 if args.full: Git.update_ref('refs/cinnabar/checked', metadata_commit) interval_expired('fsck', 0) store.close() if status('fixed'): return 2 return 0
def create_hg_manifest(self, commit, parents): manifest = GeneratedManifestInfo(NULL_NODE_ID) changeset_files = [] if parents: parent_changeset = self.changeset(self.hg_changeset(parents[0])) parent_manifest = self.manifest(parent_changeset.manifest) parent_node = parent_manifest.node if len(parents) == 2: parent2_changeset = self.changeset(self.hg_changeset(parents[1])) parent2_manifest = self.manifest(parent2_changeset.manifest) parent2_node = parent2_manifest.node if parent_node == parent2_node: parents = parents[:1] if not parents: for line in Git.ls_tree(commit, recursive=True): mode, typ, sha1, path = line node = self.create_file(sha1) manifest.add(path, node, self.ATTR[mode], modified=True) changeset_files.append(path) manifest.parents = [] manifest.delta_node = NULL_NODE_ID return manifest, changeset_files elif len(parents) == 2: if not experiment('merge'): raise Exception('Pushing merges is not supported yet') if not self._merge_warn: logging.warning('Pushing merges is experimental.') logging.warning('This may irremediably push bad state to the ' 'mercurial server!') self._merge_warn = 1 git_manifests = (self.manifest_ref(parent_node), self.manifest_ref(parent2_node)) # TODO: this would benefit from less git queries file_dags = {} for m, tree, mparents in GitHgHelper.rev_list( b'--parents', b'--topo-order', b'--full-history', b'--reverse', b'%s...%s' % git_manifests): for p in mparents: for path, sha1_after, sha1_before in manifest_diff(p, m): path = GitHgStore.manifest_path(path) if path not in file_dags: file_dags[path] = gitdag() dag = file_dags[path] if sha1_before == NULL_NODE_ID: dag.add(sha1_after, ()) else: dag.add(sha1_after, (sha1_before,)) files = [(p, mode, sha1) for mode, _, sha1, p in Git.ls_tree(commit, recursive=True)] manifests = sorted_merge(parent_manifest, parent2_manifest, key=lambda i: i.path, non_key=lambda i: i) for line in sorted_merge(files, manifests): path, f, m = line if not m: m = (None, None) manifest_line_p1, manifest_line_p2 = m if not f: # File was removed if manifest_line_p1: manifest.removed.add(path) changeset_files.append(path) continue mode, sha1 = f attr = self.ATTR[mode] if manifest_line_p1 and not manifest_line_p2: file_parents = (manifest_line_p1.sha1,) elif manifest_line_p2 and not manifest_line_p1: file_parents = (manifest_line_p2.sha1,) elif not manifest_line_p1 and not manifest_line_p2: file_parents = () changeset_files.append(path) elif manifest_line_p1.sha1 == manifest_line_p2.sha1: file_parents = (manifest_line_p1.sha1,) else: if self._merge_warn == 1: logging.warning('This may take a while...') self._merge_warn = 2 file_parents = () dag = file_dags.pop(path) if dag: dag.tag_nodes_and_parents( (manifest_line_p1.sha1,), 'a') if dag._tags.get(manifest_line_p2.sha1) == 'a': file_parents = (manifest_line_p1.sha1,) else: dag._tags.clear() dag.tag_nodes_and_parents( (manifest_line_p2.sha1,), 'b') if dag._tags.get(manifest_line_p1.sha1) == 'b': file_parents = (manifest_line_p2.sha1,) if not file_parents: file_parents = (manifest_line_p1.sha1, manifest_line_p2.sha1) assert file_parents is not None f = self._create_file_internal(sha1, *file_parents) file_parents = tuple(p for p in (f.parent1, f.parent2) if p != NULL_NODE_ID) merged = len(file_parents) == 2 if not merged and file_parents: if self.git_file_ref(file_parents[0]) == sha1: node = file_parents[0] else: merged = True if merged or not file_parents: node = self._store_file_internal(f) elif file_parents: node = file_parents[0] attr_change = (manifest_line_p1 and manifest_line_p1.attr != attr) manifest.add(path, node, attr, modified=merged or attr_change) if merged or attr_change: changeset_files.append(path) if manifest.raw_data == parent_manifest.raw_data: return parent_manifest, [] manifest.parents = (parent_node, parent2_node) return manifest, changeset_files def process_diff(diff): for (mode_before, mode_after, sha1_before, sha1_after, status, path) in diff: if status[:1] == b'R': yield status[1:], ( b'000000', sha1_before, NULL_NODE_ID, b'D') yield path, (mode_after, sha1_before, sha1_after, status) git_diff = sorted( l for l in process_diff(GitHgHelper.diff_tree( parents[0], commit, detect_copy=True)) ) if not git_diff: return parent_manifest, [] parent_lines = OrderedDict((l.path, l) for l in parent_manifest) items = manifest.items for line in sorted_merge(iteritems(parent_lines), git_diff, non_key=lambda i: i[1]): path, manifest_line, change = line if not change: items.append(manifest_line) continue mode_after, sha1_before, sha1_after, status = change path2 = status[1:] status = status[:1] attr = self.ATTR.get(mode_after) if status == b'D': manifest.removed.add(path) changeset_files.append(path) continue if status in b'MT': if sha1_before == sha1_after: node = manifest_line.sha1 else: node = self.create_file(sha1_after, manifest_line.sha1) elif status in b'RC': if sha1_after != EMPTY_BLOB: node = self.create_copy( (path2, parent_lines[path2].sha1), sha1_after, path=path) else: node = self.create_file(sha1_after) else: assert status == b'A' node = self.create_file(sha1_after) manifest.add(path, node, attr, modified=True) changeset_files.append(path) manifest.parents = (parent_node,) manifest.delta_node = parent_node return manifest, changeset_files
def push(repo, store, what, repo_heads, repo_branches): store.init_fast_import() def heads(): for sha1 in store.heads(repo_branches): yield '^%s' % store.changeset_ref(sha1) def local_bases(): for c in Git.iter('rev-list', '--stdin', '--topo-order', '--full-history', '--boundary', *(w for w in what if w), stdin=heads()): if c[0] != '-': continue yield store.hg_changeset(c[1:]) for w in what: rev = store.hg_changeset(w) if rev: yield rev common = findcommon(repo, store, set(local_bases())) logging.info('common: %s' % common) def revs(): for sha1 in common: yield '^%s' % store.changeset_ref(sha1) push_commits = list(Git.iter('rev-list', '--stdin', '--topo-order', '--full-history', '--parents', '--reverse', *(w for w in what if w), stdin=revs())) pushed = False if push_commits: has_root = any(len(p) == 40 for p in push_commits) force = all(v[1] for v in what.values()) if has_root and repo_heads: if not force: raise Exception('Cannot push a new root') else: logging.warn('Pushing a new root') if force: repo_heads = ['force'] else: if not repo_heads: repo_heads = [NULL_NODE_ID] repo_heads = [unhexlify(h) for h in repo_heads] if repo.local(): repo.local().ui.setconfig('server', 'validate', True) b2caps = bundle2caps(repo) if unbundle20 else {} if b2caps and (repo.url().startswith(('http://', 'https://')) or not isinstance(repo, HelperRepo)): b2caps['replycaps'] = True cg = create_bundle(store, push_commits, b2caps) if not isinstance(repo, HelperRepo): cg = util.chunkbuffer(cg) if not b2caps: cg = cg1unpacker(cg, 'UN') reply = repo.unbundle(cg, repo_heads, '') if unbundle20 and isinstance(reply, unbundle20): parts = iter(reply.iterparts()) for part in parts: if part.type == 'output': sys.stderr.write(part.read()) elif part.type == 'reply:changegroup': # TODO: should check params['in-reply-to'] reply = int(part.params['return']) else: logging.getLogger('bundle2').warning( 'ignoring bundle2 part: %s', part.type) pushed = reply != 0 return gitdag(push_commits) if pushed else ()
def findcommon(repo, store, hgheads): logger = logging.getLogger('findcommon') logger.debug(hgheads) if not hgheads: return set() sample_size = 100 sample = _sample(hgheads, sample_size) known = repo.known(unhexlify(h) for h in sample) known = set(h for h, k in izip(sample, known) if k) logger.info('initial sample size: %d' % len(sample)) if len(known) == len(hgheads): logger.info('all heads known') return hgheads git_heads = set(store.changeset_ref(h) for h in hgheads) git_known = set(store.changeset_ref(h) for h in known) logger.debug('known (sub)set: (%d) %s', len(known), LazyCall(sorted, git_known)) args = ['rev-list', '--topo-order', '--full-history', '--parents', '--stdin'] def revs(): for h in git_known: yield '^%s' % h for h in git_heads: if h not in git_known: yield h dag = gitdag(chain(Git.iter(*args, stdin=revs()), git_known)) dag.tag_nodes_and_parents(git_known, 'known') def log_dag(tag): if not logger.isEnabledFor(logging.DEBUG): return logger.debug('%s dag size: %d' % ( tag, sum(1 for n in dag.iternodes(tag)))) heads = sorted(dag.heads(tag)) logger.debug('%s dag heads: (%d) %s' % (tag, len(heads), heads)) roots = sorted(dag.roots(tag)) logger.debug('%s dag roots: (%d) %s' % (tag, len(roots), roots)) log_dag('unknown') log_dag('known') while True: unknown = set(chain(dag.heads(), dag.roots())) if not unknown: break sample = set(_sample(unknown, sample_size)) if len(sample) < sample_size: sample |= set(_sample(set(dag.iternodes()), sample_size - len(sample))) sample = list(sample) hg_sample = [store.hg_changeset(h) for h in sample] known = repo.known(unhexlify(h) for h in hg_sample) unknown = set(h for h, k in izip(sample, known) if not k) known = set(h for h, k in izip(sample, known) if k) logger.info('next sample size: %d' % len(sample)) logger.debug('known (sub)set: (%d) %s', len(known), LazyCall(sorted, known)) logger.debug('unknown (sub)set: (%d) %s', len(unknown), LazyCall(sorted, unknown)) dag.tag_nodes_and_parents(known, 'known') dag.tag_nodes_and_children(unknown, 'unknown') log_dag('unknown') log_dag('known') return [store.hg_changeset(h) for h in dag.heads('known')]
def push(repo, store, what, repo_heads, repo_branches, dry_run=False): def heads(): for sha1 in store.heads(repo_branches): yield '^%s' % store.changeset_ref(sha1) def local_bases(): h = chain(heads(), (w for w in what if w)) for c, t, p in GitHgHelper.rev_list('--topo-order', '--full-history', '--boundary', *h): if c[0] != '-': continue yield store.hg_changeset(c[1:]) for w in what: rev = store.hg_changeset(w) if rev: yield rev common = findcommon(repo, store, set(local_bases())) logging.info('common: %s', common) def revs(): for sha1 in common: yield '^%s' % store.changeset_ref(sha1) revs = chain(revs(), (w for w in what if w)) push_commits = list((c, p) for c, t, p in GitHgHelper.rev_list( '--topo-order', '--full-history', '--parents', '--reverse', *revs)) pushed = False if push_commits: has_root = any(len(p) == 40 for p in push_commits) force = all(v[1] for v in what.values()) if has_root and repo_heads: if not force: raise Exception('Cannot push a new root') else: logging.warn('Pushing a new root') if force: repo_heads = ['force'] else: if not repo_heads: repo_heads = [NULL_NODE_ID] repo_heads = [unhexlify(h) for h in repo_heads] if push_commits and not dry_run: if repo.local(): repo.local().ui.setconfig('server', 'validate', True) if unbundle20: b2caps = repo.capable('bundle2') or {} else: b2caps = {} if b2caps: b2caps = decodecaps(urllib.unquote(b2caps)) logging.getLogger('bundle2').debug('%r', b2caps) if b2caps: b2caps['replycaps'] = encodecaps({'error': ['abort']}) cg = create_bundle(store, push_commits, b2caps) if not isinstance(repo, HelperRepo): cg = chunkbuffer(cg) if not b2caps: cg = cg1unpacker(cg, 'UN') reply = repo.unbundle(cg, repo_heads, '') if unbundle20 and isinstance(reply, unbundle20): parts = iter(reply.iterparts()) for part in parts: logging.getLogger('bundle2').debug('part: %s', part.type) logging.getLogger('bundle2').debug('params: %r', part.params) if part.type == 'output': sys.stderr.write(part.read()) elif part.type == 'reply:changegroup': # TODO: should check params['in-reply-to'] reply = int(part.params['return']) elif part.type == 'error:abort': raise error.Abort(part.params['message'], hint=part.params.get('hint')) else: logging.getLogger('bundle2').warning( 'ignoring bundle2 part: %s', part.type) pushed = reply != 0 return gitdag(push_commits) if pushed or dry_run else ()