示例#1
0
    def __call__(self, store):
        changeset_chunks = ChunksCollection(
            progress_iter('Reading {} changesets', next(self._bundle, None)))

        for rev_chunk in progress_iter('Reading and importing {} manifests',
                                       next(self._bundle, None)):
            pass

        def enumerate_files(iter):
            last_name = None
            count_names = 0
            for count_chunks, (name, chunk) in enumerate(iter, start=1):
                if name != last_name:
                    count_names += 1
                last_name = name
                yield (count_chunks, count_names), chunk

        for rev_chunk in progress_enum(
                'Reading and importing {} revisions of {} files',
                enumerate_files(next(self._bundle, None))):
            pass

        if next(self._bundle, None) is not None:
            assert False
        del self._bundle

        for cs in progress_iter(
                'Importing {} changesets',
                changeset_chunks.iter_initialized(lambda x: x, store.changeset,
                                                  Changeset.from_chunk)):
            try:
                store.store_changeset(cs)
            except NothingToGraftException:
                logging.debug('Cannot graft %s, not importing.', cs.node)
示例#2
0
    def __call__(self, store):
        changeset_chunks = ChunksCollection(progress_iter(
            'Reading %d changesets', next(self._bundle, None)))

        for mn in progress_iter(
                'Reading and importing %d manifests',
                iter_initialized(store.manifest,
                                 iter_chunks(next(self._bundle, None),
                                             ManifestInfo))):
            store.store_manifest(mn)

        for rev_chunk in progress_iter(
                'Reading and importing %d files', next(self._bundle, None)):
            GitHgHelper.store('file', rev_chunk)

        if next(self._bundle, None) is not None:
            assert False
        del self._bundle

        for cs in progress_iter(
                'Importing %d changesets',
                changeset_chunks.iter_initialized(lambda x: x, store.changeset,
                                                  Changeset.from_chunk)):
            try:
                store.store_changeset(cs)
            except NothingToGraftException:
                logging.warn('Cannot graft %s, not importing.', cs.node)
示例#3
0
def getbundle(repo, store, heads, branch_names):
    if isinstance(repo, bundlerepo):
        bundle = repo._unbundler
    else:
        common = findcommon(repo, store, store.heads(branch_names))
        logging.info('common: %s' % common)
        kwargs = {}
        if unbundle20 and repo.capable('bundle2'):
            bundle2caps = {
                'HG20': (),
                'changegroup': ('01', '02'),
            }
            kwargs['bundlecaps'] = set((
                'HG20', 'bundle2=%s' % urllib.quote(encodecaps(bundle2caps))))

        bundle = repo.getbundle('bundle', heads=[unhexlify(h) for h in heads],
                                common=[unhexlify(h) for h in common],
                                **kwargs)

        bundle = unbundler(bundle)

    changeset_chunks = ChunksCollection(progress_iter(
        'Reading %d changesets', next(bundle)))

    manifest_chunks = ChunksCollection(progress_iter(
        'Reading %d manifests', next(bundle)))

    for rev_chunk in progress_iter(
            'Reading and importing %d files', iter_initialized(
                store.file, next(bundle))):
        store.store_file(rev_chunk)

    if next(bundle) is not None:
        assert False
    del bundle

    with store.batch_store_manifest():
        for mn in progress_iter(
                'Importing %d manifests',
                manifest_chunks.iter_initialized(ManifestInfo,
                                                 store.manifest)):
            store.store_manifest(mn)

    del manifest_chunks

    for cs in progress_iter(
            'Importing %d changesets',
            changeset_chunks.iter_initialized(ChangesetInfo, store.changeset)):
        try:
            store.store_changeset(cs)
        except NothingToGraftException:
            logging.warn('Cannot graft %s, not importing.' % cs.node)
            pass
示例#4
0
def getbundle(repo, store, heads, branch_names):
    if isinstance(repo, bundlerepo):
        bundle = repo._unbundler
    else:
        common = findcommon(repo, store, store.heads(branch_names))
        logging.info('common: %s' % common)
        kwargs = {}
        if unbundle20 and repo.capable('bundle2'):
            bundle2caps = {
                'HG20': (),
                'changegroup': ('01', '02'),
            }
            kwargs['bundlecaps'] = set(
                ('HG20', 'bundle2=%s' % urllib.quote(encodecaps(bundle2caps))))

        bundle = repo.getbundle('bundle',
                                heads=[unhexlify(h) for h in heads],
                                common=[unhexlify(h) for h in common],
                                **kwargs)

        bundle = unbundler(bundle)

    changeset_chunks = ChunksCollection(
        progress_iter('Reading %d changesets', next(bundle)))

    manifest_chunks = ChunksCollection(
        progress_iter('Reading %d manifests', next(bundle)))

    for rev_chunk in progress_iter('Reading and importing %d files',
                                   iter_initialized(store.file, next(bundle))):
        store.store_file(rev_chunk)

    if next(bundle) is not None:
        assert False
    del bundle

    for mn in progress_iter(
            'Importing %d manifests',
            manifest_chunks.iter_initialized(ManifestInfo, store.manifest)):
        store.store_manifest(mn)

    del manifest_chunks

    for cs in progress_iter(
            'Importing %d changesets',
            changeset_chunks.iter_initialized(ChangesetInfo, store.changeset)):
        try:
            store.store_changeset(cs)
        except NothingToGraftException:
            logging.warn('Cannot graft %s, not importing.' % cs.node)
            pass
示例#5
0
    def __call__(self, store):
        changeset_chunks = ChunksCollection(
            progress_iter('Reading {} changesets', next(self._bundle, None)))

        if experiment('store-manifest'):
            for rev_chunk in progress_iter(
                    'Reading and importing {} manifests',
                    next(self._bundle, None)):
                GitHgHelper.store('manifest', rev_chunk)
                store.check_manifest(rev_chunk)
        else:
            for mn in progress_iter(
                    'Reading and importing {} manifests',
                    iter_initialized(
                        store.manifest,
                        iter_chunks(next(self._bundle, None), ManifestInfo))):
                store.store_manifest(mn)

        def enumerate_files(iter):
            last_name = None
            count_names = 0
            for count_chunks, (name, chunk) in enumerate(iter):
                if name != last_name:
                    count_names += 1
                last_name = name
                yield (count_chunks, count_names), chunk

        for rev_chunk in progress_enum(
                'Reading and importing {} revisions of {} files',
                enumerate_files(next(self._bundle, None))):
            GitHgHelper.store('file', rev_chunk)

        if next(self._bundle, None) is not None:
            assert False
        del self._bundle

        for cs in progress_iter(
                'Importing {} changesets',
                changeset_chunks.iter_initialized(lambda x: x, store.changeset,
                                                  Changeset.from_chunk)):
            try:
                store.store_changeset(cs)
            except NothingToGraftException:
                logging.warn('Cannot graft %s, not importing.', cs.node)
示例#6
0
    def __call__(self, store):
        changeset_chunks = ChunksCollection(progress_iter(
            'Reading {} changesets', next(self._bundle, None)))

        for rev_chunk in progress_iter(
                'Reading and importing {} manifests',
                next(self._bundle, None)):
            if not self._use_store_changegroup:
                GitHgHelper.store('manifest', rev_chunk)

        def enumerate_files(iter):
            last_name = None
            count_names = 0
            for count_chunks, (name, chunk) in enumerate(iter, start=1):
                if name != last_name:
                    count_names += 1
                last_name = name
                yield (count_chunks, count_names), chunk

        for rev_chunk in progress_enum(
                'Reading and importing {} revisions of {} files',
                enumerate_files(next(self._bundle, None))):
            if not self._use_store_changegroup:
                GitHgHelper.store('file', rev_chunk)

        if next(self._bundle, None) is not None:
            assert False
        del self._bundle

        for cs in progress_iter(
                'Importing {} changesets',
                changeset_chunks.iter_initialized(lambda x: x, store.changeset,
                                                  Changeset.from_chunk)):
            try:
                store.store_changeset(cs)
            except NothingToGraftException:
                logging.warn('Cannot graft %s, not importing.', cs.node)
示例#7
0
    def _ensure_ready(self):
        assert hasattr(self, '_store')
        if self._store is None:
            return
        store = self._store
        self._store = None

        raw_unbundler = unbundler(self._bundle)
        self._dag = gitdag()
        branches = set()

        chunks = []

        def iter_and_store(iterator):
            for item in iterator:
                chunks.append(item)
                yield item

        changeset_chunks = ChunksCollection(
            progress_iter(
                'Analyzing {} changesets from ' + fsdecode(self._file),
                iter_and_store(next(raw_unbundler, None))))

        for chunk in changeset_chunks.iter_initialized(lambda x: x,
                                                       store.changeset,
                                                       Changeset.from_chunk):
            extra = chunk.extra or {}
            branch = extra.get(b'branch', b'default')
            branches.add(branch)
            self._dag.add(
                chunk.node,
                tuple(p for p in (chunk.parent1, chunk.parent2)
                      if p != NULL_NODE_ID), branch)
        self._heads = tuple(
            reversed(
                [unhexlify(h) for h in self._dag.all_heads(with_tags=False)]))
        self._branchmap = defaultdict(list)
        for tag, node in self._dag.all_heads():
            self._branchmap[tag].append(unhexlify(node))

        def repo_unbundler():
            yield iter(chunks)
            yield next(raw_unbundler, None)
            yield next(raw_unbundler, None)
            if next(raw_unbundler, None) is not None:
                assert False

        self._unbundler = repo_unbundler()
示例#8
0
    def _ensure_ready(self):
        assert hasattr(self, '_store')
        if self._store is None:
            return
        store = self._store
        self._store = None

        raw_unbundler = unbundler(self._bundle)
        self._dag = gitdag()
        branches = set()

        chunks = []

        def iter_and_store(iterator):
            for item in iterator:
                chunks.append(item)
                yield item

        changeset_chunks = ChunksCollection(progress_iter(
            'Analyzing {} changesets from ' + self._file,
            iter_and_store(next(raw_unbundler, None))))

        for chunk in changeset_chunks.iter_initialized(lambda x: x,
                                                       store.changeset,
                                                       Changeset.from_chunk):
            extra = chunk.extra or {}
            branch = extra.get('branch', 'default')
            branches.add(branch)
            self._dag.add(chunk.node,
                          tuple(p for p in (chunk.parent1, chunk.parent2)
                                if p != NULL_NODE_ID), branch)
        self._heads = tuple(reversed(
            [unhexlify(h) for h in self._dag.all_heads(with_tags=False)]))
        self._branchmap = defaultdict(list)
        for tag, node in self._dag.all_heads():
            self._branchmap[tag].append(unhexlify(node))

        def repo_unbundler():
            yield iter(chunks)
            yield next(raw_unbundler, None)
            yield next(raw_unbundler, None)
            if next(raw_unbundler, None) is not None:
                assert False

        self._unbundler = repo_unbundler()
示例#9
0
    def init(self, store):
        raw_unbundler = unbundler(self._bundle)
        self._dag = gitdag()
        branches = set()

        chunks = []

        def iter_and_store(iterator):
            for item in iterator:
                chunks.append(item)
                yield item

        changeset_chunks = ChunksCollection(
            progress_iter('Analyzing %d changesets from ' + self._file,
                          iter_and_store(next(raw_unbundler))))

        for chunk in changeset_chunks.iter_initialized(ChangesetInfo,
                                                       store.changeset):
            extra = chunk.extra or {}
            branch = extra.get('branch', 'default')
            branches.add(branch)
            self._dag.add(
                chunk.node,
                tuple(p for p in (chunk.parent1, chunk.parent2)
                      if p != NULL_NODE_ID), branch)
        self._heads = tuple(
            reversed(
                [unhexlify(h) for h in self._dag.all_heads(with_tags=False)]))
        self._branchmap = defaultdict(list)
        for tag, node in self._dag.all_heads():
            self._branchmap[tag].append(unhexlify(node))
        self._tip = unhexlify(chunk.node)

        def repo_unbundler():
            yield chunks
            yield next(raw_unbundler)
            yield next(raw_unbundler)
            if next(raw_unbundler) is not None:
                assert False

        self._unbundler = repo_unbundler()
示例#10
0
    def init(self, store):
        raw_unbundler = unbundler(self._bundle)
        self._dag = gitdag()
        branches = set()

        chunks = []

        def iter_and_store(iterator):
            for item in iterator:
                chunks.append(item)
                yield item

        changeset_chunks = ChunksCollection(progress_iter(
            'Analyzing %d changesets from ' + self._file,
            iter_and_store(next(raw_unbundler))))

        for chunk in changeset_chunks.iter_initialized(ChangesetInfo,
                                                       store.changeset):
            extra = chunk.extra or {}
            branch = extra.get('branch', 'default')
            branches.add(branch)
            self._dag.add(chunk.node,
                          tuple(p for p in (chunk.parent1, chunk.parent2)
                                if p != NULL_NODE_ID), branch)
        self._heads = tuple(reversed(
            [unhexlify(h) for h in self._dag.all_heads(with_tags=False)]))
        self._branchmap = defaultdict(list)
        for tag, node in self._dag.all_heads():
            self._branchmap[tag].append(unhexlify(node))
        self._tip = unhexlify(chunk.node)

        def repo_unbundler():
            yield chunks
            yield next(raw_unbundler)
            yield next(raw_unbundler)
            if next(raw_unbundler) is not None:
                assert False

        self._unbundler = repo_unbundler()
示例#11
0
def fsck_quick():
    status = FsckStatus()
    store = GitHgStore()

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if not metadata_commit:
        status.info(
            'There does not seem to be any git-cinnabar metadata.\n'
            'Is this a git-cinnabar clone?'
        )
        return 1
    commit = GitCommit(metadata_commit)
    if commit.body != 'files-meta unified-manifests-v2':
        status.info(
            'The git-cinnabar metadata is incompatible with this version.\n'
            'Please use the git-cinnabar version it was used with last.\n'
        )
        return 1
    if len(commit.parents) > 6 or len(commit.parents) < 5:
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1
    changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5]

    commit = GitCommit(changesets)
    heads = OrderedDict(
        (node, branch)
        for node, _, branch in (d.partition(' ')
                                for d in commit.body.splitlines()))
    if len(heads) != len(commit.parents):
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1

    manifest_nodes = []

    parents = None
    fix_changeset_heads = False
    # TODO: Check that the recorded heads are actually dag heads.
    for c, changeset_node in progress_iter(
            'Checking {} changeset heads', izip(commit.parents, heads)):
        gitsha1 = GitHgHelper.hg2git(changeset_node)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for changeset %s'
                          % changeset_node)
            continue
        if gitsha1 != c:
            if parents is None:
                parents = set(commit.parents)
            if gitsha1 not in parents:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says changeset %s maps to %s\n'
                    '  but hg2git metadata says it maps to %s'
                    % (changeset_node, c, gitsha1))
                continue
            fix_changeset_heads = True
        changeset = store._changeset(c, include_parents=True)
        if not changeset:
            status.report('Missing git2hg metadata for git commit %s' % c)
            continue
        if changeset.node != changeset_node:
            if changeset.node not in heads:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says %s maps to changeset %s\n'
                    '  but git2hg metadata says it maps to changeset %s'
                    % (c, changeset_node, changeset.node))
                continue
            fix_changeset_heads = True
        if changeset.node != changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset.node)
            continue
        changeset_branch = changeset.branch or 'default'
        if heads[changeset.node] != changeset_branch:
            status.report(
                'Inconsistent metadata:\n'
                '  Head metadata says changeset %s is in branch %s\n'
                '  but git2hg metadata says it is in branch %s'
                % (changeset.node, heads[changeset.node], changeset_branch))
            continue
        manifest_nodes.append(changeset.manifest)

    if status('broken'):
        return 1

    # Rebuilding manifests benefits from limiting the difference with
    # the last rebuilt manifest. Similarly, building the list of unique
    # files in all manifests benefits from that too.
    # Unfortunately, the manifest heads are not ordered in a topological
    # relevant matter, and the differences between two consecutive manifests
    # can be much larger than they could be. The consequence is spending a
    # large amount of time rebuilding the manifests and gathering the files
    # list. It's actually faster to attempt to reorder them according to
    # some heuristics first, such that the differences are smaller.
    # Here, we use the depth from the root node(s) to reorder the manifests.
    # This doesn't give the most optimal ordering, but it's already much
    # faster. On a clone of multiple mozilla-* repositories with > 1400 heads,
    # it's close to an order of magnitude difference on the "Checking
    # manifests" loop.
    depths = {}
    roots = []
    manifest_queue = []
    for m, _, parents in progress_iter(
            'Loading {} manifests', GitHgHelper.rev_list(
                '--topo-order', '--reverse', '--full-history', '%s^@'
                % manifests)):
        manifest_queue.append((m, parents))
        if parents:
            depth = {}
            for p in parents:
                for root, num in depths[p].iteritems():
                    if root in depth:
                        depth[root] = max(depth[root], num + 1)
                    else:
                        depth[root] = num + 1
            depths[m] = depth
            del depth
        else:
            depths[m] = {m: 0}
            roots.append(m)

    if status('broken'):
        return 1

    # TODO: check that all manifest_nodes gathered above are available in the
    # manifests dag, and that the dag heads are the recorded heads.
    manifests_commit = GitCommit(manifests)
    depths = [
        [depths[p].get(r, 0) for r in roots]
        for p in manifests_commit.parents
    ]
    manifests_commit_parents = [
        p for _, p in sorted(zip(depths, manifests_commit.parents))
    ]
    previous = None
    all_interesting = set()
    for m in progress_iter('Checking {} manifest heads',
                           manifests_commit_parents):
        c = GitCommit(m)
        if not SHA1_RE.match(c.body):
            status.report('Invalid manifest metadata in git commit %s' % m)
            continue
        gitsha1 = GitHgHelper.hg2git(c.body)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for manifest %s' % c.body)
            continue
        if not GitHgHelper.check_manifest(c.body):
            status.report('Sha1 mismatch for manifest %s' % c.body)

        files = {}
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, m):
                if d in 'AM' and before != after and \
                        (path, after) not in all_interesting:
                    files[path] = after
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True):
                if (path, sha1) not in all_interesting:
                    files[path] = sha1
        all_interesting.update(files.iteritems())
        previous = m

    if status('broken'):
        return 1

    progress = Progress('Checking {} files')
    while all_interesting and manifest_queue:
        (m, parents) = manifest_queue.pop()
        changes = get_changes(m, parents, all=True)
        for path, hg_file, hg_fileparents in changes:
            if hg_fileparents[1:] == (hg_file,):
                continue
            elif hg_fileparents[:1] == (hg_file,):
                continue
            # Reaching here means the file received a modification compared
            # to its parents. If it's a file we're going to check below,
            # it means we don't need to check its parents if somehow they were
            # going to be checked. If it's not a file we're going to check
            # below, it's because it's either a file we weren't interested in
            # in the first place, or it's the parent of a file we have checked.
            # Either way, we aren't interested in the parents.
            for p in hg_fileparents:
                all_interesting.discard((path, p))
            if (path, hg_file) not in all_interesting:
                continue
            all_interesting.remove((path, hg_file))
            if not GitHgHelper.check_file(hg_file, *hg_fileparents):
                p = store.manifest_path(path)
                status.report(
                    'Sha1 mismatch for file %s\n'
                    '  revision %s' % (p, hg_file))

                print_parents = ' '.join(p for p in hg_fileparents
                                         if p != NULL_NODE_ID)
                if print_parents:
                    status.report('  with parent%s %s' % (
                        's' if len(print_parents) > 41 else '',
                        print_parents))
            progress.progress()
    progress.finish()
    if all_interesting:
        status.info('Could not find the following files:')
        for path, sha1 in sorted(all_interesting):
            p = store.manifest_path(path)
            status.info('  %s %s' % (sha1, path))
        status.info(
            'This might be a bug in `git cinnabar fsck`. Please open '
            'an issue, with the message above, on\n'
            'https://github.com/glandium/git-cinnabar/issues')
        return 1

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted.\n'
            'Please open an issue, with the information above, on\n'
            'https://github.com/glandium/git-cinnabar/issues')
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        if Git.resolve_ref('refs/cinnabar/checked'):
            status.info(
                '\nThen please try to run `git cinnabar rollback --fsck` to '
                'restore last known state, and to update from the mercurial '
                'repository.')
        else:
            status.info('\nThen please try to run `git cinnabar reclone`.')
        status.info(
            '\nPlease note this may affect the commit sha1s of mercurial '
            'changesets, and may require to rebase your local branches.')
        status.info(
            '\nAlternatively, you may start afresh with a new clone. In any '
            'case, please keep this corrupted repository around for further '
            'debugging.')
        return 1

    refresh = []
    if fix_changeset_heads:
        status.fix('Fixing changeset heads metadata order.')
        refresh.append('refs/cinnabar/changesets')
    interval_expired('fsck', 0)
    store.close(refresh=refresh)
    GitHgHelper._helper = False
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    Git.update_ref('refs/cinnabar/checked', metadata_commit)
    return 0
示例#12
0
def fsck_quick(force=False):
    status = FsckStatus()
    store = GitHgStore()

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if not metadata_commit:
        status.info('There does not seem to be any git-cinnabar metadata.\n'
                    'Is this a git-cinnabar clone?')
        return 1
    broken_metadata = Git.resolve_ref('refs/cinnabar/broken')
    checked_metadata = Git.resolve_ref('refs/cinnabar/checked')
    if checked_metadata == broken_metadata:
        checked_metadata = None
    if metadata_commit == checked_metadata and not force:
        status.info('The git-cinnabar metadata was already checked and is '
                    'presumably clean.\n'
                    'Try `--force` if you want to check anyways.')
        return 0
    elif force:
        checked_metadata = None

    commit = GitCommit(metadata_commit)
    if commit.body != 'files-meta unified-manifests-v2':
        status.info(
            'The git-cinnabar metadata is incompatible with this version.\n'
            'Please use the git-cinnabar version it was used with last.\n')
        return 1
    if len(commit.parents) > 6 or len(commit.parents) < 5:
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1
    changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5]

    commit = GitCommit(changesets)
    heads = OrderedDict(
        (node, branch)
        for node, _, branch in (d.partition(' ')
                                for d in commit.body.splitlines()))
    if len(heads) != len(commit.parents):
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1

    manifest_nodes = []

    parents = None
    fix_changeset_heads = False

    def get_checked_metadata(num):
        if not checked_metadata:
            return None
        commit = Git.resolve_ref('{}^{}'.format(checked_metadata, num))
        if commit:
            return GitCommit(commit)

    checked_commit = get_checked_metadata(1)
    # TODO: Check that the recorded heads are actually dag heads.
    for c, changeset_node in progress_iter(
            'Checking {} changeset heads',
        ((c, node) for c, node in izip(commit.parents, heads)
         if not checked_commit or c not in checked_commit.parents)):
        gitsha1 = GitHgHelper.hg2git(changeset_node)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for changeset %s' %
                          changeset_node)
            continue
        if gitsha1 != c:
            if parents is None:
                parents = set(commit.parents)
            if gitsha1 not in parents:
                status.report('Inconsistent metadata:\n'
                              '  Head metadata says changeset %s maps to %s\n'
                              '  but hg2git metadata says it maps to %s' %
                              (changeset_node, c, gitsha1))
                continue
            fix_changeset_heads = True
        changeset = store._changeset(c, include_parents=True)
        if not changeset:
            status.report('Missing git2hg metadata for git commit %s' % c)
            continue
        if changeset.node != changeset_node:
            if changeset.node not in heads:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says %s maps to changeset %s\n'
                    '  but git2hg metadata says it maps to changeset %s' %
                    (c, changeset_node, changeset.node))
                continue
            fix_changeset_heads = True
        if changeset.node != changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset.node)
            continue
        changeset_branch = changeset.branch or 'default'
        if heads[changeset.node] != changeset_branch:
            status.report(
                'Inconsistent metadata:\n'
                '  Head metadata says changeset %s is in branch %s\n'
                '  but git2hg metadata says it is in branch %s' %
                (changeset.node, heads[changeset.node], changeset_branch))
            continue
        manifest_nodes.append(changeset.manifest)

    if status('broken'):
        return 1

    # Rebuilding manifests benefits from limiting the difference with
    # the last rebuilt manifest. Similarly, building the list of unique
    # files in all manifests benefits from that too.
    # Unfortunately, the manifest heads are not ordered in a topological
    # relevant matter, and the differences between two consecutive manifests
    # can be much larger than they could be. The consequence is spending a
    # large amount of time rebuilding the manifests and gathering the files
    # list. It's actually faster to attempt to reorder them according to
    # some heuristics first, such that the differences are smaller.
    # Here, we use the depth from the root node(s) to reorder the manifests.
    # This doesn't give the most optimal ordering, but it's already much
    # faster. On a clone of multiple mozilla-* repositories with > 1400 heads,
    # it's close to an order of magnitude difference on the "Checking
    # manifests" loop.
    depths = {}
    roots = {}
    manifest_queue = []
    revs = []
    revs.append('{}^@'.format(manifests))
    if checked_metadata:
        revs.append('^{}^2^@'.format(checked_metadata))
    for m, _, parents in progress_iter(
            'Loading {} manifests',
            GitHgHelper.rev_list('--topo-order', '--reverse', '--full-history',
                                 *revs)):
        manifest_queue.append((m, parents))
        if parents:
            depth = {}
            for p in parents:
                for root, num in depths.get(p, {}).iteritems():
                    if root in depth:
                        depth[root] = max(depth[root], num + 1)
                    else:
                        depth[root] = num + 1
            if depth:
                depths[m] = depth
                del depth
                continue
        depths[m] = {m: 0}
        roots[m] = parents

    if status('broken'):
        return 1

    # TODO: check that all manifest_nodes gathered above are available in the
    # manifests dag, and that the dag heads are the recorded heads.
    manifests_commit = GitCommit(manifests)
    checked_commit = get_checked_metadata(2)
    depths = [([depths[p].get(r, 0) for r in roots], p)
              for p in manifests_commit.parents
              if not checked_commit or p not in checked_commit.parents]
    manifests_commit_parents = [p for _, p in sorted(depths)]
    previous = None
    all_interesting = set()
    for m in progress_iter('Checking {} manifest heads',
                           manifests_commit_parents):
        c = GitCommit(m)
        if not SHA1_RE.match(c.body):
            status.report('Invalid manifest metadata in git commit %s' % m)
            continue
        gitsha1 = GitHgHelper.hg2git(c.body)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for manifest %s' % c.body)
            continue
        if not GitHgHelper.check_manifest(c.body):
            status.report('Sha1 mismatch for manifest %s' % c.body)

        files = {}
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, m):
                if d in 'AM' and before != after and \
                        (path, after) not in all_interesting:
                    files[path] = after
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True):
                if (path, sha1) not in all_interesting:
                    files[path] = sha1
        all_interesting.update(files.iteritems())
        previous = m

    if status('broken'):
        return 1

    # Don't check files that were already there in the previously checked
    # manifests.
    previous = None
    for parents in roots.itervalues():
        for p in parents:
            if previous:
                for _, _, before, after, d, path in GitHgHelper.diff_tree(
                        previous, p):
                    if d in 'AM' and before != after:
                        all_interesting.discard((path, after))
            else:
                for _, t, sha1, path in GitHgHelper.ls_tree(p, recursive=True):
                    all_interesting.discard((path, sha1))
            previous = p

    progress = Progress('Checking {} files')
    while all_interesting and manifest_queue:
        (m, parents) = manifest_queue.pop()
        changes = get_changes(m, parents, all=True)
        for path, hg_file, hg_fileparents in changes:
            if hg_fileparents[1:] == (hg_file, ):
                continue
            elif hg_fileparents[:1] == (hg_file, ):
                continue
            # Reaching here means the file received a modification compared
            # to its parents. If it's a file we're going to check below,
            # it means we don't need to check its parents if somehow they were
            # going to be checked. If it's not a file we're going to check
            # below, it's because it's either a file we weren't interested in
            # in the first place, or it's the parent of a file we have checked.
            # Either way, we aren't interested in the parents.
            for p in hg_fileparents:
                all_interesting.discard((path, p))
            if (path, hg_file) not in all_interesting:
                continue
            all_interesting.remove((path, hg_file))
            if not GitHgHelper.check_file(hg_file, *hg_fileparents):
                p = store.manifest_path(path)
                status.report('Sha1 mismatch for file %s\n'
                              '  revision %s' % (p, hg_file))

                print_parents = ' '.join(p for p in hg_fileparents
                                         if p != NULL_NODE_ID)
                if print_parents:
                    status.report('  with parent%s %s' %
                                  ('s' if len(print_parents) > 41 else '',
                                   print_parents))
            progress.progress()
    progress.finish()
    if all_interesting:
        status.info('Could not find the following files:')
        for path, sha1 in sorted(all_interesting):
            p = store.manifest_path(path)
            status.info('  %s %s' % (sha1, path))
        status.info('This might be a bug in `git cinnabar fsck`. Please open '
                    'an issue, with the message above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        return 1

    if status('broken'):
        status.info('Your git-cinnabar repository appears to be corrupted.\n'
                    'Please open an issue, with the information above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        if checked_metadata:
            status.info(
                '\nThen please try to run `git cinnabar rollback --fsck` to '
                'restore last known state, and to update from the mercurial '
                'repository.')
        else:
            status.info('\nThen please try to run `git cinnabar reclone`.')
        status.info(
            '\nPlease note this may affect the commit sha1s of mercurial '
            'changesets, and may require to rebase your local branches.')
        status.info(
            '\nAlternatively, you may start afresh with a new clone. In any '
            'case, please keep this corrupted repository around for further '
            'debugging.')
        return 1

    refresh = []
    if fix_changeset_heads:
        status.fix('Fixing changeset heads metadata order.')
        refresh.append('refs/cinnabar/changesets')
    interval_expired('fsck', 0)
    store.close(refresh=refresh)
    GitHgHelper._helper = False
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    Git.update_ref('refs/cinnabar/checked', metadata_commit)
    return 0
示例#13
0
def fsck(args):
    '''check cinnabar metadata consistency'''

    if not args.commit and not args.full:
        return fsck_quick(args.force)

    status = FsckStatus()

    store = GitHgStore()

    if args.full and args.commit:
        logging.error('Cannot pass both --full and a commit')
        return 1

    if args.commit:
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            cs = store.hg_changeset(c)
            if cs:
                commits.add(c)
                c = cs.node
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not cs:
                status.info('Unknown commit or changeset: %s' % c)
                return 1
            if not cs:
                cs = store.hg_changeset(commit)
                commits.add(commit)

        all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits)
    else:
        all_refs = dict(
            (ref, sha1) for sha1, ref in Git.for_each_ref('refs/cinnabar'))

        if 'refs/cinnabar/metadata' in all_refs:
            git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata']
        else:
            assert False

        all_git_commits = GitHgHelper.rev_list('--topo-order',
                                               '--full-history', '--reverse',
                                               git_heads)

    dag = gitdag()

    GitHgHelper.reset_heads('manifests')

    full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG)

    for node, tree, parents in progress_iter('Checking {} changesets',
                                             all_git_commits):
        node = store._replace.get(node, node)
        hg_node = store.hg_changeset(node)
        if not hg_node:
            status.report('Missing note for git commit: ' + node)
            continue
        GitHgHelper.seen('git2hg', node)

        changeset_data = store.changeset(hg_node)
        changeset = changeset_data.node

        GitHgHelper.seen('hg2git', changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            status.report('Missing changeset in hg2git branch: %s' % changeset)
            continue
        elif str(changeset_ref) != node:
            status.report('Commit mismatch for changeset %s\n'
                          '  hg2git: %s\n  commit: %s' %
                          (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        if hg_changeset.node != hg_changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.branch or 'default')

        raw_changeset = Changeset.from_git_commit(node)
        patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data)
        if patcher != store.read_changeset_data(node):
            status.fix('Adjusted changeset metadata for %s' % changeset)
            GitHgHelper.set('changeset', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset', changeset, node)
            GitHgHelper.put_blob(patcher, want_sha1=False)
            GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset-metadata', changeset, ':1')

        manifest = changeset_data.manifest
        if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID:
            continue
        manifest_ref = store.manifest_ref(manifest)
        if not manifest_ref:
            status.report('Missing manifest in hg2git branch: %s' % manifest)

        parents = tuple(
            store.changeset(p).manifest for p in hg_changeset.parents)
        git_parents = tuple(
            store.manifest_ref(p) for p in parents if p != NULL_NODE_ID)

        # This doesn't change the value but makes the helper track the manifest
        # dag.
        GitHgHelper.set('manifest', manifest, manifest_ref)

        if not GitHgHelper.check_manifest(manifest):
            status.report('Sha1 mismatch for manifest %s' % manifest)

        manifest_commit_parents = GitCommit(manifest_ref).parents
        if sorted(manifest_commit_parents) != sorted(git_parents):
            # TODO: better error
            status.report(
                '%s(%s) %s != %s' %
                (manifest, manifest_ref, manifest_commit_parents, git_parents))

        # TODO: check that manifest content matches changeset content

        changes = get_changes(manifest_ref, git_parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE
                                            or GitHgHelper.seen(
                                                'hg2git', hg_file)):
                if full_file_check:
                    file = store.file(hg_file, hg_fileparents, git_parents,
                                      store.manifest_path(path))
                    valid = file.node == file.sha1
                else:
                    valid = GitHgHelper.check_file(hg_file, *hg_fileparents)
                if not valid:
                    status.report('Sha1 mismatch for file %s in manifest %s' %
                                  (hg_file, manifest_ref))

    if not args.commit and not status('broken'):
        store_manifest_heads = set(store._manifest_heads_orig)
        manifest_heads = set(GitHgHelper.heads('manifests'))
        if store_manifest_heads != manifest_heads:

            def iter_manifests(a, b):
                for h in a - b:
                    yield h
                for h in b:
                    yield '^%s' % h

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(manifest_heads, store_manifest_heads)):
                status.fix('Missing manifest commit in manifest branch: %s' %
                           m)

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(store_manifest_heads, manifest_heads)):
                status.fix('Removing metadata commit %s with no corresponding '
                           'changeset' % (m))

            for h in store_manifest_heads - manifest_heads:
                if GitHgHelper.seen('hg2git', store.hg_manifest(h)):
                    status.fix('Removing non-head reference to %s in manifests'
                               ' metadata.' % h)
    dangling = ()
    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('hg2git')
    for obj in dangling:
        status.fix('Removing dangling metadata for ' + obj)
        # Theoretically, we should figure out if they are files, manifests
        # or changesets and set the right variable accordingly, but in
        # practice, it makes no difference. Reevaluate when GitHgStore.close
        # is modified, though.
        GitHgHelper.set('file', obj, NULL_NODE_ID)
        GitHgHelper.set('file-meta', obj, NULL_NODE_ID)

    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('git2hg')
    for c in dangling:
        status.fix('Removing dangling note for commit ' + c)
        GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID)

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted. There\n'
            'are known issues in older revisions that have been fixed.\n'
            'Please try running the following command to reset:\n'
            '  git cinnabar reclone\n\n'
            'Please note this command may change the commit sha1s. Your\n'
            'local branches will however stay untouched.\n'
            'Please report any corruption that fsck would detect after a\n'
            'reclone.')

    if not args.commit:
        status.info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                status.fix('Adding missing head %s in branch %s' %
                           (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                status.fix('Removing non-head reference to %s in branch %s' %
                           (head, branch))
                del store._hgheads[head]

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if status('broken'):
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        return 1

    if args.full:
        Git.update_ref('refs/cinnabar/checked', metadata_commit)
    interval_expired('fsck', 0)
    store.close()

    if status('fixed'):
        return 2
    return 0
示例#14
0
def fsck(args):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--manifests', action='store_true',
        help='Validate manifests hashes')
    parser.add_argument(
        '--files', action='store_true',
        help='Validate files hashes')
    parser.add_argument(
        'commit', nargs='*',
        help='Specific commit or changeset to check')
    args = parser.parse_args(args)

    status = {
        'broken': False,
        'fixed': False,
    }

    def info(message):
        sys.stderr.write('\r')
        print message

    def fix(message):
        status['fixed'] = True
        info(message)

    def report(message):
        status['broken'] = True
        info(message)

    store = GitHgStore()
    store.init_fast_import(lambda: FastImport())

    if args.commit:
        all_hg2git = {}
        all_notes = set()
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            data = store.read_changeset_data(c)
            if data:
                all_notes.add(c)
                commits.add(c)
                c = data['changeset']
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not data:
                info('Unknown commit or changeset: %s' % c)
                return 1
            if commit != NULL_NODE_ID:
                all_hg2git[c] = commit, 'commit'
            if not data:
                data = store.read_changeset_data(commit)
                commits.add(commit)
                if data:
                    all_notes.add(commit)

        all_git_commits = Git.iter(
            'log', '--no-walk=unsorted', '--stdin', '--format=%T %H',
            stdin=commits)
    else:
        all_hg2git = {
            path.replace('/', ''): (filesha1, intern(typ))
            for mode, typ, filesha1, path in
            progress_iter('Reading %d mercurial to git mappings',
                          Git.ls_tree('refs/cinnabar/hg2git', recursive=True))
        }

        all_notes = set(path.replace('/', '') for mode, typ, filesha1, path in
                        progress_iter(
                            'Reading %d commit to changeset mappings',
                            Git.ls_tree('refs/notes/cinnabar',
                                        recursive=True)))

        manifest_commits = OrderedDict((m, None) for m in progress_iter(
            'Reading %d manifest trees',
            Git.iter('rev-list', '--full-history',
                     '--topo-order', 'refs/cinnabar/manifest'))
        )

        all_git_heads = Git.for_each_ref('refs/cinnabar/branches',
                                         format='%(refname)')

        all_git_commits = Git.iter('log', '--topo-order', '--full-history',
                                   '--reverse', '--stdin', '--format=%T %H',
                                   stdin=all_git_heads)

    store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()}

    seen_changesets = set()
    seen_manifests = set()
    seen_manifest_refs = {}
    seen_files = set()
    seen_notes = set()

    hg_manifest = None

    dag = gitdag()

    for line in progress_iter('Checking %d changesets', all_git_commits):
        tree, node = line.split(' ')
        if node not in all_notes:
            report('Missing note for git commit: ' + node)
            continue
        seen_notes.add(node)

        changeset_data = store.read_changeset_data(node)
        changeset = changeset_data['changeset']
        if 'extra' in changeset_data:
            extra = changeset_data['extra']
            header, message = GitHgHelper.cat_file(
                'commit', node).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if 'committer' in extra:
                committer_info = store.hg_author_info(header['committer'])
                committer = '%s %d %d' % committer_info
                if (committer != extra['committer'] and
                        header['committer'] != extra['committer'] and
                        committer_info[0] != extra['committer']):
                    report('Committer mismatch between commit and metadata for'
                           ' changeset %s' % changeset)
                if committer == extra['committer']:
                    fix('Fixing useless committer metadata for changeset %s'
                        % changeset)
                    del changeset_data['extra']['committer']
                    store._changesets[changeset] = LazyString(node)
            if header['committer'] != header['author'] and not extra:
                fix('Fixing useless empty extra metadata for changeset %s'
                    % changeset)
                del changeset_data['extra']
                store._changesets[changeset] = LazyString(node)

        seen_changesets.add(changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            report('Missing changeset in hg2git branch: %s' % changeset)
        elif str(changeset_ref) != node:
            report('Commit mismatch for changeset %s\n'
                   '  hg2git: %s\n  commit: %s'
                   % (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        sha1 = hg_changeset.sha1
        if hg_changeset.node != sha1:
            try_fixup = False
            if (changeset, sha1) in (
                ('8c557b7c03a4a753e5c163038f04862e9f65fce1',
                 '249b59139de8e08abeb6c4e261a137c756e7af0e'),
                ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf',
                 '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'),
            ):
                header = hg_changeset.data.split('\n', 4)
                start = sum(len(h) for h in header[:3]) + 1
                changeset_data['patch'] = ((start, start + 1, '1'),)
                try_fixup = True

            # Some know cases of corruptions involve a whitespace after the
            # timezone. Adding an empty extra metadata works around those.
            elif 'extra' not in changeset_data:
                changeset_data['extra'] = {}
                try_fixup = True

            if try_fixup:
                hg_changeset = store.changeset(changeset, include_parents=True)
                sha1 = hg_changeset.sha1
                if hg_changeset.node == sha1:
                    fix('Fixing known sha1 mismatch for changeset %s' %
                        changeset)
                    store._changesets[changeset] = LazyString(node)

        if hg_changeset.node != sha1:
            report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.get('extra', {}).get('branch', 'default'))

        manifest = changeset_data['manifest']
        if manifest in seen_manifests:
            continue
        seen_manifests.add(manifest)
        manifest_ref = store.manifest_ref(manifest)
        if manifest_ref:
            seen_manifest_refs[manifest_ref] = manifest
        if not manifest_ref:
            report('Missing manifest in hg2git branch: %s' % manifest)
        elif not args.commit and manifest_ref not in manifest_commits:
            report('Missing manifest commit in manifest branch: %s' %
                   manifest_ref)

        if args.manifests or args.files:
            parents = tuple(
                store.read_changeset_data(store.changeset_ref(p))['manifest']
                for p in (hg_changeset.parent1, hg_changeset.parent2)
                if p != NULL_NODE_ID
            )

        if args.manifests:
            try:
                with GitHgHelper.query('check-manifest', manifest,
                                       *parents) as stdout:
                    if stdout.readline().strip() != 'ok':
                        report('Sha1 mismatch for manifest %s' % manifest)
            except NoHelperException:
                hg_manifest = store.manifest(manifest)
                hg_manifest.set_parents(*parents)
                if hg_manifest.node != hg_manifest.sha1:
                    report('Sha1 mismatch for manifest %s' % manifest)

        git_ls = one(Git.ls_tree(manifest_ref, 'git'))
        if git_ls:
            mode, typ, sha1, path = git_ls
        else:
            header, message = GitHgHelper.cat_file(
                'commit', manifest_ref).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if header['tree'] == EMPTY_TREE:
                sha1 = EMPTY_TREE
            else:
                report('Missing git tree in manifest commit %s' % manifest_ref)
                sha1 = None
        if sha1 and sha1 != tree:
            report('Tree mismatch between manifest commit %s and commit %s'
                   % (manifest_ref, node))

        if args.files:
            changes = get_changes(
                manifest_ref, tuple(store.manifest_ref(p) for p in parents),
                'hg')
            for path, hg_file, hg_fileparents in changes:
                if hg_file != NULL_NODE_ID and hg_file not in seen_files:
                    file = store.file(hg_file)
                    file.set_parents(*hg_fileparents)
                    if file.node != file.sha1:
                        report('Sha1 mismatch for file %s in manifest %s'
                               % (hg_file, manifest_ref))
                    seen_files.add(hg_file)

    if args.files:
        all_hg2git = set(all_hg2git.iterkeys())
    else:
        all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems()
                         if t == 'commit')

    adjusted = {}
    if not args.commit:
        dangling = set(manifest_commits) - set(seen_manifest_refs)
        if dangling:
            def iter_manifests():
                removed_one = False
                yielded = False
                previous = None
                for obj in reversed(manifest_commits):
                    if obj in dangling:
                        fix('Removing metadata commit %s with no hg2git entry'
                            % obj)
                        removed_one = True
                    else:
                        if removed_one:
                            yield obj, previous
                            yielded = True
                        previous = obj

                if removed_one and not yielded:
                    yield obj, False

            for obj, parent in progress_iter('Adjusting %d metadata commits',
                                             iter_manifests()):
                mark = store._fast_import.new_mark()
                if parent is False:
                    Git.update_ref('refs/cinnabar/manifest', obj)
                    continue
                elif parent:
                    parents = (adjusted.get(parent, parent),)
                with store._fast_import.commit(
                        ref='refs/cinnabar/manifest',
                        parents=parents, mark=mark) as commit:
                    mode, typ, tree, path = store._fast_import.ls(obj)
                    commit.filemodify('', tree, typ='tree')
                adjusted[obj] = Mark(mark)

    dangling = all_hg2git - seen_changesets - seen_manifests - seen_files
    if dangling or adjusted:
        with store._fast_import.commit(
                ref='refs/cinnabar/hg2git',
                parents=('refs/cinnabar/hg2git^0',)) as commit:
            for obj in dangling:
                fix('Removing dangling metadata for ' + obj)
                commit.filedelete(sha1path(obj))
            for obj, mark in progress_iter(
                    'Updating hg2git for %d metadata commits',
                    adjusted.iteritems()):
                commit.filemodify(sha1path(seen_manifest_refs[obj]), mark,
                                  typ='commit')

    dangling = all_notes - seen_notes
    if dangling:
        with store._fast_import.commit(
                ref='refs/notes/cinnabar',
                parents=('refs/notes/cinnabar^0',)) as commit:
            for c in dangling:
                fix('Removing dangling note for commit ' + c)
                # That's brute force, but meh.
                for l in range(0, 10):
                    commit.filedelete(sha1path(c, l))

    if status['broken']:
        info('Your git-cinnabar repository appears to be corrupted. There\n'
             'are known issues in older revisions that have been fixed.\n'
             'Please try running the following command to reset:\n'
             '  git cinnabar reclone\n\n'
             'Please note this command may change the commit sha1s. Your\n'
             'local branches will however stay untouched.\n'
             'Please report any corruption that fsck would detect after a\n'
             'reclone.')

    if not args.commit:
        info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                fix('Adding missing head %s in branch %s' %
                    (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                fix('Removing non-head reference to %s in branch %s' %
                    (head, branch))
                store._hgheads.remove((branch, head))

    store.close()

    if status['broken']:
        return 1
    if status['fixed']:
        return 2
    return 0
示例#15
0
def fsck(args):
    parser = argparse.ArgumentParser()
    parser.add_argument('--manifests',
                        action='store_true',
                        help='Validate manifests hashes')
    parser.add_argument('--files',
                        action='store_true',
                        help='Validate files hashes')
    parser.add_argument('commit',
                        nargs='*',
                        help='Specific commit or changeset to check')
    args = parser.parse_args(args)

    status = {
        'broken': False,
        'fixed': False,
    }

    def info(message):
        sys.stderr.write('\r')
        print message

    def fix(message):
        status['fixed'] = True
        info(message)

    def report(message):
        status['broken'] = True
        info(message)

    store = GitHgStore()
    store.init_fast_import(lambda: FastImport())

    if args.commit:
        all_hg2git = {}
        all_notes = set()
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            data = store.read_changeset_data(c)
            if data:
                all_notes.add(c)
                commits.add(c)
                c = data['changeset']
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not data:
                info('Unknown commit or changeset: %s' % c)
                return 1
            if commit != NULL_NODE_ID:
                all_hg2git[c] = commit, 'commit'
            if not data:
                data = store.read_changeset_data(commit)
                commits.add(commit)
                if data:
                    all_notes.add(commit)

        all_git_commits = Git.iter('log',
                                   '--no-walk=unsorted',
                                   '--stdin',
                                   '--format=%T %H',
                                   stdin=commits)
    else:
        all_hg2git = {
            path.replace('/', ''): (filesha1, intern(typ))
            for mode, typ, filesha1, path in progress_iter(
                'Reading %d mercurial to git mappings',
                Git.ls_tree('refs/cinnabar/hg2git', recursive=True))
        }

        all_notes = set(
            path.replace('/', '')
            for mode, typ, filesha1, path in progress_iter(
                'Reading %d commit to changeset mappings',
                Git.ls_tree('refs/notes/cinnabar', recursive=True)))

        manifest_commits = OrderedDict((m, None) for m in progress_iter(
            'Reading %d manifest trees',
            Git.iter('rev-list', '--full-history', '--topo-order',
                     'refs/cinnabar/manifest')))

        all_git_heads = Git.for_each_ref('refs/cinnabar/branches',
                                         format='%(refname)')

        all_git_commits = Git.iter('log',
                                   '--topo-order',
                                   '--full-history',
                                   '--reverse',
                                   '--stdin',
                                   '--format=%T %H',
                                   stdin=all_git_heads)

    store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()}

    seen_changesets = set()
    seen_manifests = set()
    seen_manifest_refs = {}
    seen_files = set()
    seen_notes = set()

    hg_manifest = None

    dag = gitdag()

    for line in progress_iter('Checking %d changesets', all_git_commits):
        tree, node = line.split(' ')
        if node not in all_notes:
            report('Missing note for git commit: ' + node)
            continue
        seen_notes.add(node)

        changeset_data = store.read_changeset_data(node)
        changeset = changeset_data['changeset']
        if 'extra' in changeset_data:
            extra = changeset_data['extra']
            header, message = GitHgHelper.cat_file('commit',
                                                   node).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if 'committer' in extra:
                committer_info = store.hg_author_info(header['committer'])
                committer = '%s %d %d' % committer_info
                if (committer != extra['committer']
                        and header['committer'] != extra['committer']
                        and committer_info[0] != extra['committer']):
                    report('Committer mismatch between commit and metadata for'
                           ' changeset %s' % changeset)
                if committer == extra['committer']:
                    fix('Fixing useless committer metadata for changeset %s' %
                        changeset)
                    del changeset_data['extra']['committer']
                    store._changesets[changeset] = LazyString(node)
            if header['committer'] != header['author'] and not extra:
                fix('Fixing useless empty extra metadata for changeset %s' %
                    changeset)
                del changeset_data['extra']
                store._changesets[changeset] = LazyString(node)

        seen_changesets.add(changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            report('Missing changeset in hg2git branch: %s' % changeset)
        elif str(changeset_ref) != node:
            report('Commit mismatch for changeset %s\n'
                   '  hg2git: %s\n  commit: %s' %
                   (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        sha1 = hg_changeset.sha1
        if hg_changeset.node != sha1:
            try_fixup = False
            if (changeset, sha1) in (
                ('8c557b7c03a4a753e5c163038f04862e9f65fce1',
                 '249b59139de8e08abeb6c4e261a137c756e7af0e'),
                ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf',
                 '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'),
            ):
                header = hg_changeset.data.split('\n', 4)
                start = sum(len(h) for h in header[:3]) + 1
                changeset_data['patch'] = ((start, start + 1, '1'), )
                try_fixup = True

            # Some know cases of corruptions involve a whitespace after the
            # timezone. Adding an empty extra metadata works around those.
            elif 'extra' not in changeset_data:
                changeset_data['extra'] = {}
                try_fixup = True

            if try_fixup:
                hg_changeset = store.changeset(changeset, include_parents=True)
                sha1 = hg_changeset.sha1
                if hg_changeset.node == sha1:
                    fix('Fixing known sha1 mismatch for changeset %s' %
                        changeset)
                    store._changesets[changeset] = LazyString(node)

        if hg_changeset.node != sha1:
            report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.get('extra', {}).get('branch', 'default'))

        manifest = changeset_data['manifest']
        if manifest in seen_manifests:
            continue
        seen_manifests.add(manifest)
        manifest_ref = store.manifest_ref(manifest)
        if manifest_ref:
            seen_manifest_refs[manifest_ref] = manifest
        if not manifest_ref:
            report('Missing manifest in hg2git branch: %s' % manifest)
        elif not args.commit and manifest_ref not in manifest_commits:
            report('Missing manifest commit in manifest branch: %s' %
                   manifest_ref)

        if args.manifests or args.files:
            parents = tuple(
                store.read_changeset_data(store.changeset_ref(p))['manifest']
                for p in (hg_changeset.parent1, hg_changeset.parent2)
                if p != NULL_NODE_ID)

        if args.manifests:
            try:
                with GitHgHelper.query('check-manifest', manifest,
                                       *parents) as stdout:
                    if stdout.readline().strip() != 'ok':
                        report('Sha1 mismatch for manifest %s' % manifest)
            except NoHelperException:
                hg_manifest = store.manifest(manifest)
                hg_manifest.set_parents(*parents)
                if hg_manifest.node != hg_manifest.sha1:
                    report('Sha1 mismatch for manifest %s' % manifest)

        git_ls = one(Git.ls_tree(manifest_ref, 'git'))
        if git_ls:
            mode, typ, sha1, path = git_ls
        else:
            header, message = GitHgHelper.cat_file('commit',
                                                   manifest_ref).split(
                                                       '\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if header['tree'] == EMPTY_TREE:
                sha1 = EMPTY_TREE
            else:
                report('Missing git tree in manifest commit %s' % manifest_ref)
                sha1 = None
        if sha1 and sha1 != tree:
            report('Tree mismatch between manifest commit %s and commit %s' %
                   (manifest_ref, node))

        if args.files:
            changes = get_changes(
                manifest_ref, tuple(store.manifest_ref(p) for p in parents),
                'hg')
            for path, hg_file, hg_fileparents in changes:
                if hg_file != NULL_NODE_ID and hg_file not in seen_files:
                    file = store.file(hg_file)
                    file.set_parents(*hg_fileparents)
                    if file.node != file.sha1:
                        report('Sha1 mismatch for file %s in manifest %s' %
                               (hg_file, manifest_ref))
                    seen_files.add(hg_file)

    if args.files:
        all_hg2git = set(all_hg2git.iterkeys())
    else:
        all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems()
                         if t == 'commit')

    adjusted = {}
    if not args.commit:
        dangling = set(manifest_commits) - set(seen_manifest_refs)
        if dangling:

            def iter_manifests():
                removed_one = False
                yielded = False
                previous = None
                for obj in reversed(manifest_commits):
                    if obj in dangling:
                        fix('Removing metadata commit %s with no hg2git entry'
                            % obj)
                        removed_one = True
                    else:
                        if removed_one:
                            yield obj, previous
                            yielded = True
                        previous = obj

                if removed_one and not yielded:
                    yield obj, False

            for obj, parent in progress_iter('Adjusting %d metadata commits',
                                             iter_manifests()):
                mark = store._fast_import.new_mark()
                if parent is False:
                    Git.update_ref('refs/cinnabar/manifest', obj)
                    continue
                elif parent:
                    parents = (adjusted.get(parent, parent), )
                with store._fast_import.commit(ref='refs/cinnabar/manifest',
                                               parents=parents,
                                               mark=mark) as commit:
                    mode, typ, tree, path = store._fast_import.ls(obj)
                    commit.filemodify('', tree, typ='tree')
                adjusted[obj] = Mark(mark)

    dangling = all_hg2git - seen_changesets - seen_manifests - seen_files
    if dangling or adjusted:
        with store._fast_import.commit(
                ref='refs/cinnabar/hg2git',
                parents=('refs/cinnabar/hg2git^0', )) as commit:
            for obj in dangling:
                fix('Removing dangling metadata for ' + obj)
                commit.filedelete(sha1path(obj))
            for obj, mark in progress_iter(
                    'Updating hg2git for %d metadata commits',
                    adjusted.iteritems()):
                commit.filemodify(sha1path(seen_manifest_refs[obj]),
                                  mark,
                                  typ='commit')

    dangling = all_notes - seen_notes
    if dangling:
        with store._fast_import.commit(
                ref='refs/notes/cinnabar',
                parents=('refs/notes/cinnabar^0', )) as commit:
            for c in dangling:
                fix('Removing dangling note for commit ' + c)
                # That's brute force, but meh.
                for l in range(0, 10):
                    commit.filedelete(sha1path(c, l))

    if status['broken']:
        info('Your git-cinnabar repository appears to be corrupted. There\n'
             'are known issues in older revisions that have been fixed.\n'
             'Please try running the following command to reset:\n'
             '  git cinnabar reclone\n\n'
             'Please note this command may change the commit sha1s. Your\n'
             'local branches will however stay untouched.\n'
             'Please report any corruption that fsck would detect after a\n'
             'reclone.')

    if not args.commit:
        info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                fix('Adding missing head %s in branch %s' % (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                fix('Removing non-head reference to %s in branch %s' %
                    (head, branch))
                store._hgheads.remove((branch, head))

    store.close()

    if status['broken']:
        return 1
    if status['fixed']:
        return 2
    return 0
示例#16
0
def bundle_data(store, commits):
    manifests = OrderedDict()
    files = defaultdict(list)

    for node, parents in progress_iter('Bundling {} changesets', commits):
        if len(parents) > 2:
            raise Exception(
                'Pushing octopus merges to mercurial is not supported')

        changeset_data = store.read_changeset_data(node)
        is_new = changeset_data is None or check_enabled('bundle')
        if is_new:
            store.create_hg_metadata(node, parents)
        hg_changeset = store._changeset(node, include_parents=True)
        if is_new:
            store.add_head(hg_changeset.node, hg_changeset.parent1,
                           hg_changeset.parent2)
        yield hg_changeset
        manifest = hg_changeset.manifest
        if manifest not in manifests and manifest != NULL_NODE_ID:
            if manifest not in (store.changeset(p).manifest
                                for p in hg_changeset.parents):
                manifests[manifest] = hg_changeset.node

    yield None

    for manifest, changeset in progress_iter('Bundling {} manifests',
                                             iteritems(manifests)):
        hg_manifest = store.manifest(manifest, include_parents=True)
        hg_manifest.changeset = changeset
        yield hg_manifest
        manifest_ref = store.manifest_ref(manifest)
        parents = tuple(store.manifest_ref(p) for p in hg_manifest.parents)
        changes = get_changes(manifest_ref, parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID:
                files[store.manifest_path(path)].append(
                    (hg_file, hg_fileparents, changeset, parents))

    yield None

    def iter_files(files):
        count_chunks = 0
        for count_names, path in enumerate(sorted(files), 1):
            yield (count_chunks, count_names), path
            nodes = set()
            for node, parents, changeset, mn_parents in files[path]:
                if node in nodes:
                    continue
                count_chunks += 1
                nodes.add(node)
                file = store.file(node, parents, mn_parents, path)
                file.changeset = changeset
                assert file.node == file.sha1
                yield (count_chunks, count_names), file

            yield (count_chunks, count_names), None

    for chunk in progress_enum('Bundling {} revisions of {} files',
                               iter_files(files)):
        yield chunk

    yield None
示例#17
0
def bundle_data(store, commits):
    manifests = OrderedDict()
    files = defaultdict(list)

    for node, parents in progress_iter('Bundling {} changesets', commits):
        if len(parents) > 2:
            raise Exception(
                'Pushing octopus merges to mercurial is not supported')

        changeset_data = store.read_changeset_data(node)
        is_new = changeset_data is None or check_enabled('bundle')
        if is_new:
            store.create_hg_metadata(node, parents)
        hg_changeset = store._changeset(node, include_parents=True)
        if is_new:
            store.add_head(hg_changeset.node, hg_changeset.parent1,
                           hg_changeset.parent2)
        yield hg_changeset
        manifest = hg_changeset.manifest
        if manifest not in manifests and manifest != NULL_NODE_ID:
            if manifest not in (store.changeset(p).manifest
                                for p in hg_changeset.parents):
                manifests[manifest] = hg_changeset.node

    yield None

    for manifest, changeset in progress_iter('Bundling {} manifests',
                                             manifests.iteritems()):
        hg_manifest = store.manifest(manifest, include_parents=True)
        hg_manifest.changeset = changeset
        yield hg_manifest
        manifest_ref = store.manifest_ref(manifest)
        parents = tuple(store.manifest_ref(p) for p in hg_manifest.parents)
        changes = get_changes(manifest_ref, parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID:
                files[store.manifest_path(path)].append(
                    (hg_file, hg_fileparents, changeset, parents))

    yield None

    def iter_files(files):
        for path in sorted(files):
            yield path
            nodes = set()
            for node, parents, changeset, mn_parents in files[path]:
                if node in nodes:
                    continue
                nodes.add(node)
                file = store.file(node, parents, mn_parents, path)
                file.changeset = changeset
                assert file.node == file.sha1
                yield file

            yield None

    class Filt(object):
        def __init__(self):
            self._previous = None

        def __call__(self, chunk):
            ret = self._previous and chunk is not None
            self._previous = chunk
            return ret

    for chunk in progress_iter('Bundling {} files', iter_files(files), Filt()):
        yield chunk

    yield None
示例#18
0
def bundle_data(store, commits):
    manifests = OrderedDict()
    files = defaultdict(list)

    for node, parents in progress_iter('Bundling {} changesets', commits):
        if len(parents) > 2:
            raise Exception(
                'Pushing octopus merges to mercurial is not supported')

        changeset_data = store.read_changeset_data(node)
        is_new = changeset_data is None or check_enabled('bundle')
        if is_new:
            store.create_hg_metadata(node, parents)
        hg_changeset = store._changeset(node, include_parents=True)
        if is_new:
            store.add_head(hg_changeset.node, hg_changeset.parent1,
                           hg_changeset.parent2)
        yield hg_changeset
        manifest = hg_changeset.manifest
        if manifest not in manifests and manifest != NULL_NODE_ID:
            if manifest not in (store.changeset(p).manifest
                                for p in hg_changeset.parents):
                manifests[manifest] = hg_changeset.node

    yield None

    for manifest, changeset in progress_iter('Bundling {} manifests',
                                             manifests.iteritems()):
        hg_manifest = store.manifest(manifest, include_parents=True)
        hg_manifest.changeset = changeset
        yield hg_manifest
        manifest_ref = store.manifest_ref(manifest)
        parents = tuple(store.manifest_ref(p) for p in hg_manifest.parents)
        changes = get_changes(manifest_ref, parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID:
                files[store.manifest_path(path)].append(
                    (hg_file, hg_fileparents, changeset, parents))

    yield None

    def iter_files(files):
        count_chunks = 0
        for count_names, path in enumerate(sorted(files), 1):
            yield (count_chunks, count_names), path
            nodes = set()
            for node, parents, changeset, mn_parents in files[path]:
                if node in nodes:
                    continue
                count_chunks += 1
                nodes.add(node)
                file = store.file(node, parents, mn_parents, path)
                file.changeset = changeset
                assert file.node == file.sha1
                yield (count_chunks, count_names), file

            yield (count_chunks, count_names), None

    for chunk in progress_enum('Bundling {} revisions of {} files',
                               iter_files(files)):
        yield chunk

    yield None
示例#19
0
def check_replace(store):
    self_refs = [r for r, s in store._replace.iteritems() if r == s]
    for r in progress_iter('Removing {} self-referencing grafts', self_refs):
        del store._replace[r]
示例#20
0
def fsck(args):
    '''check cinnabar metadata consistency'''

    if not args.commit and not args.full:
        return fsck_quick()

    status = FsckStatus()

    store = GitHgStore()

    if args.full and args.commit:
        logging.error('Cannot pass both --full and a commit')
        return 1

    if args.commit:
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            cs = store.hg_changeset(c)
            if cs:
                commits.add(c)
                c = cs.node
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not cs:
                status.info('Unknown commit or changeset: %s' % c)
                return 1
            if not cs:
                cs = store.hg_changeset(commit)
                commits.add(commit)

        all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits)
    else:
        all_refs = dict((ref, sha1)
                        for sha1, ref in Git.for_each_ref('refs/cinnabar'))

        if 'refs/cinnabar/metadata' in all_refs:
            git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata']
        else:
            assert False

        all_git_commits = GitHgHelper.rev_list(
            '--topo-order', '--full-history', '--reverse', git_heads)

    dag = gitdag()

    GitHgHelper.reset_heads('manifests')

    full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG)

    for node, tree, parents in progress_iter('Checking {} changesets',
                                             all_git_commits):
        node = store._replace.get(node, node)
        hg_node = store.hg_changeset(node)
        if not hg_node:
            status.report('Missing note for git commit: ' + node)
            continue
        GitHgHelper.seen('git2hg', node)

        changeset_data = store.changeset(hg_node)
        changeset = changeset_data.node

        GitHgHelper.seen('hg2git', changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            status.report('Missing changeset in hg2git branch: %s' % changeset)
            continue
        elif str(changeset_ref) != node:
            status.report('Commit mismatch for changeset %s\n'
                          '  hg2git: %s\n  commit: %s'
                          % (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        if hg_changeset.node != hg_changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.branch or 'default')

        raw_changeset = Changeset.from_git_commit(node)
        patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data)
        if patcher != store.read_changeset_data(node):
            status.fix('Adjusted changeset metadata for %s' % changeset)
            GitHgHelper.set('changeset', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset', changeset, node)
            GitHgHelper.put_blob(patcher, want_sha1=False)
            GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset-metadata', changeset, ':1')

        manifest = changeset_data.manifest
        if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID:
            continue
        manifest_ref = store.manifest_ref(manifest)
        if not manifest_ref:
            status.report('Missing manifest in hg2git branch: %s' % manifest)

        parents = tuple(
            store.changeset(p).manifest
            for p in hg_changeset.parents
        )
        git_parents = tuple(store.manifest_ref(p) for p in parents
                            if p != NULL_NODE_ID)

        # This doesn't change the value but makes the helper track the manifest
        # dag.
        GitHgHelper.set('manifest', manifest, manifest_ref)

        if not GitHgHelper.check_manifest(manifest):
            status.report('Sha1 mismatch for manifest %s' % manifest)

        manifest_commit_parents = GitCommit(manifest_ref).parents
        if sorted(manifest_commit_parents) != sorted(git_parents):
            # TODO: better error
            status.report('%s(%s) %s != %s' % (manifest, manifest_ref,
                                               manifest_commit_parents,
                                               git_parents))

        # TODO: check that manifest content matches changeset content

        changes = get_changes(manifest_ref, git_parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE or
                                            GitHgHelper.seen('hg2git',
                                                             hg_file)):
                if full_file_check:
                    file = store.file(hg_file, hg_fileparents, git_parents,
                                      store.manifest_path(path))
                    valid = file.node == file.sha1
                else:
                    valid = GitHgHelper.check_file(hg_file,
                                                   *hg_fileparents)
                if not valid:
                    status.report(
                        'Sha1 mismatch for file %s in manifest %s'
                        % (hg_file, manifest_ref))

    if not args.commit and not status('broken'):
        store_manifest_heads = set(store._manifest_heads_orig)
        manifest_heads = set(GitHgHelper.heads('manifests'))
        if store_manifest_heads != manifest_heads:
            def iter_manifests(a, b):
                for h in a - b:
                    yield h
                for h in b:
                    yield '^%s' % h

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(manifest_heads, store_manifest_heads)):
                status.fix('Missing manifest commit in manifest branch: %s'
                           % m)

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(store_manifest_heads, manifest_heads)):
                status.fix('Removing metadata commit %s with no corresponding '
                           'changeset' % (m))

            for h in store_manifest_heads - manifest_heads:
                if GitHgHelper.seen('hg2git', store.hg_manifest(h)):
                    status.fix('Removing non-head reference to %s in manifests'
                               ' metadata.' % h)
    dangling = ()
    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('hg2git')
    for obj in dangling:
        status.fix('Removing dangling metadata for ' + obj)
        # Theoretically, we should figure out if they are files, manifests
        # or changesets and set the right variable accordingly, but in
        # practice, it makes no difference. Reevaluate when GitHgStore.close
        # is modified, though.
        GitHgHelper.set('file', obj, NULL_NODE_ID)
        GitHgHelper.set('file-meta', obj, NULL_NODE_ID)

    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('git2hg')
    for c in dangling:
        status.fix('Removing dangling note for commit ' + c)
        GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID)

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted. There\n'
            'are known issues in older revisions that have been fixed.\n'
            'Please try running the following command to reset:\n'
            '  git cinnabar reclone\n\n'
            'Please note this command may change the commit sha1s. Your\n'
            'local branches will however stay untouched.\n'
            'Please report any corruption that fsck would detect after a\n'
            'reclone.')

    if not args.commit:
        status.info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                status.fix('Adding missing head %s in branch %s' %
                           (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                status.fix('Removing non-head reference to %s in branch %s' %
                           (head, branch))
                del store._hgheads[head]

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if status('broken'):
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        return 1

    if args.full:
        Git.update_ref('refs/cinnabar/checked', metadata_commit)
    interval_expired('fsck', 0)
    store.close()

    if status('fixed'):
        return 2
    return 0
示例#21
0
def bundle_data(store, commits):
    manifests = OrderedDict()
    files = defaultdict(list)

    for nodes in progress_iter('Bundling %d changesets', commits):
        parents = nodes.split()
        node = parents.pop(0)
        assert len(parents) <= 2
        changeset_data = store.read_changeset_data(node)
        is_new = changeset_data is None
        if is_new:
            store.create_hg_metadata(node, parents)
            changeset_data = store.read_changeset_data(node)
        changeset = changeset_data['changeset']
        hg_changeset = store.changeset(changeset, include_parents=True)
        if is_new:
            store.add_head(hg_changeset.node, hg_changeset.parent1,
                           hg_changeset.parent2)
        yield hg_changeset
        manifest = changeset_data['manifest']
        if manifest not in manifests and manifest != NULL_NODE_ID:
            manifests[manifest] = changeset

    yield None

    for manifest, changeset in progress_iter('Bundling %d manifests',
                                             manifests.iteritems()):
        hg_manifest = store.manifest(manifest, include_parents=True)
        hg_manifest.changeset = changeset
        yield hg_manifest
        manifest_ref = store.manifest_ref(manifest)
        if isinstance(manifest_ref, Mark):
            for path, (sha1, attr) in hg_manifest.modified.iteritems():
                if not isinstance(sha1, types.StringType):
                    continue
                file = store.file(sha1)
                files[path].append((sha1, file.parents, changeset))
            continue
        parents = tuple(store.manifest_ref(p) for p in hg_manifest.parents)
        changes = get_changes(manifest_ref, parents, 'hg')
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID:
                files[path].append((hg_file, hg_fileparents, changeset))

    yield None

    def iter_files(files):
        for path in sorted(files):
            yield path
            nodes = set()
            for node, parents, changeset in files[path]:
                if node in nodes:
                    continue
                nodes.add(node)
                file = store.file(node)
                file.set_parents(*parents)
                file.changeset = changeset
                assert file.node == file.sha1
                yield file

            yield None

    class Filt(object):
        def __init__(self):
            self._previous = None

        def __call__(self, chunk):
            ret = self._previous and chunk is not None
            self._previous = chunk
            return ret

    for chunk in progress_iter('Bundling %d files', iter_files(files), Filt()):
        yield chunk

    yield None