Пример #1
0
def unbundle_fh(fh, path):
    header = readexactly(fh, 4)
    magic, version = header[0:2], header[2:4]
    if magic != b'HG':
        raise Exception('%s: not a Mercurial bundle' % fsdecode(path))
    if version == b'10':
        alg = readexactly(fh, 2)
        return cg1unpacker(fh, alg)
    elif unbundle20 and version.startswith(b'2'):
        return unbundle20(get_ui(), fh)
    else:
        raise Exception('%s: unsupported bundle version %s' %
                        (fsdecode(path), version.decode('ascii')))
Пример #2
0
def reclone(args):
    '''reclone all mercurial remotes'''

    from cinnabar.cmd.rollback import do_rollback
    git_config = {}
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if metadata_commit:
        git_config['cinnabar.previous-metadata'] = \
            metadata_commit.decode('ascii')
    # TODO: Avoid resetting at all, possibly leaving the repo with no metadata
    # if this is interrupted somehow.
    do_rollback(NULL_NODE_ID.decode('ascii'))
    for line in Git.iter('config', '--get-regexp', 'remote\..*\.url'):
        config, url = line.split()
        name = config[len('remote.'):-len('.url')]
        skip_pref = 'remote.%s.skipDefaultUpdate' % name.decode('ascii')
        if (url.startswith((b'hg::', b'hg://'))
                and Git.config(skip_pref) != 'true'):
            Git.run('remote',
                    'update',
                    '--prune',
                    fsdecode(name),
                    config=git_config)
            git_config = {}

    print('Please note that reclone left your local branches untouched.')
    print('They may be based on entirely different commits.')
Пример #3
0
    def _ensure_ready(self):
        assert hasattr(self, '_store')
        if self._store is None:
            return
        store = self._store
        self._store = None

        raw_unbundler = unbundler(self._bundle)
        self._dag = gitdag()
        branches = set()

        chunks = []

        def iter_and_store(iterator):
            for item in iterator:
                chunks.append(item)
                yield item

        changeset_chunks = ChunksCollection(
            progress_iter(
                'Analyzing {} changesets from ' + fsdecode(self._file),
                iter_and_store(next(raw_unbundler, None))))

        for chunk in changeset_chunks.iter_initialized(lambda x: x,
                                                       store.changeset,
                                                       Changeset.from_chunk):
            extra = chunk.extra or {}
            branch = extra.get(b'branch', b'default')
            branches.add(branch)
            self._dag.add(
                chunk.node,
                tuple(p for p in (chunk.parent1, chunk.parent2)
                      if p != NULL_NODE_ID), branch)
        self._heads = tuple(
            reversed(
                [unhexlify(h) for h in self._dag.all_heads(with_tags=False)]))
        self._branchmap = defaultdict(list)
        for tag, node in self._dag.all_heads():
            self._branchmap[tag].append(unhexlify(node))

        def repo_unbundler():
            yield iter(chunks)
            yield next(raw_unbundler, None)
            yield next(raw_unbundler, None)
            if next(raw_unbundler, None) is not None:
                assert False

        self._unbundler = repo_unbundler()
Пример #4
0
def get_clonebundle(repo):
    url = Git.config('cinnabar.clonebundle', remote=repo.remote)
    if not url:
        url = get_clonebundle_url(repo)

    if not url:
        return None

    parsed_url = urlparse(url)
    if parsed_url.scheme not in (b'http', b'https'):
        logging.warn('Server advertizes clone bundle but provided a non '
                     'http/https url. Skipping.')
        return None

    sys.stderr.write('Getting clone bundle from %s\n' % fsdecode(url))
    return get_bundle(url)
Пример #5
0
def do_cinnabarclone(repo, manifest, store, limit_schemes=True):
    GRAFT = {
        None: None,
        b'false': False,
        b'true': True,
    }
    try:
        enable_graft = Git.config('cinnabar.graft',
                                  remote=repo.remote,
                                  values=GRAFT)
    except InvalidConfig:
        enable_graft = None

    url = None
    candidates = []
    for line in manifest.splitlines():
        line = line.strip()
        if not line:
            continue
        spec, _, params = line.partition(b' ')
        params = {
            k: v
            for k, _, v in (p.partition(b'=') for p in params.split())
        }
        graft = params.pop(b'graft', None)
        if params:
            # Future proofing: ignore lines with unknown params, even if we
            # support some that are present.
            continue
        # When grafting, ignore lines without a graft revision.
        if store._graft and not graft:
            continue
        # When explicitly disabling graft, ignore lines with a graft revision.
        if enable_graft is False and graft:
            continue

        graft = graft.split(b',') if graft else []
        graft_u = []
        for g in graft:
            if SHA1_RE.match(g):
                graft_u.append(g.decode('ascii'))
        if len(graft) != len(graft_u):
            continue
        if graft:
            revs = list(Git.iter('rev-parse', '--revs-only', *graft_u))
            if len(revs) != len(graft):
                continue
            # We apparently have all the grafted revisions locally, ensure
            # they're actually reachable.
            if not any(
                    Git.iter('rev-list',
                             '--branches',
                             '--tags',
                             '--remotes',
                             '--max-count=1',
                             '--ancestry-path',
                             '--stdin',
                             stdin=(b'^%s^@' % c for c in graft),
                             stderr=open(os.devnull, 'wb'))):
                continue

        candidates.append((spec, len(graft) != 0))

    if enable_graft is not False:
        graft_filters = [True, False]
    else:
        graft_filters = [False]
    for graft_filter in graft_filters:
        for spec, graft in candidates:
            if graft == graft_filter:
                url, _, branch = spec.partition(b'#')
                url, branch = (url.split(b'#', 1) + [None])[:2]
                if url:
                    break
        if url:
            break

    if not url:
        logging.warn('Server advertizes cinnabarclone but didn\'t provide '
                     'a git repository url to fetch from.')
        return False

    parsed_url = urlparse(url)
    if limit_schemes and parsed_url.scheme not in (b'http', b'https', b'git'):
        logging.warn('Server advertizes cinnabarclone but provided a non '
                     'http/https git repository. Skipping.')
        return False
    sys.stderr.write('Fetching cinnabar metadata from %s\n' % fsdecode(url))
    sys.stderr.flush()
    return store.merge(url, repo.url(), branch)
Пример #6
0
 def lookup(self, key):
     data = HgRepoHelper.lookup(key)
     if data:
         return unhexlify(data)
     raise Exception('Unknown revision %s' % fsdecode(key))
Пример #7
0
def push(repo, store, what, repo_heads, repo_branches, dry_run=False):
    def heads():
        for sha1 in store.heads(repo_branches):
            yield b'^%s' % store.changeset_ref(sha1)

    def local_bases():
        h = chain(heads(), (w for w, _, _ in what if w))
        for c, t, p in GitHgHelper.rev_list(b'--topo-order', b'--full-history',
                                            b'--boundary', *h):
            if c[:1] != b'-':
                continue
            yield store.hg_changeset(c[1:])

        for w, _, _ in what:
            if w:
                rev = store.hg_changeset(w)
                if rev:
                    yield rev

    common = findcommon(repo, store, set(local_bases()))
    logging.info('common: %s', common)

    def revs():
        for sha1 in common:
            yield b'^%s' % store.changeset_ref(sha1)

    revs = chain(revs(), (w for w, _, _ in what if w))
    push_commits = list((c, p) for c, t, p in GitHgHelper.rev_list(
        b'--topo-order', b'--full-history', b'--parents', b'--reverse', *revs))

    pushed = False
    if push_commits:
        has_root = any(not p for (c, p) in push_commits)
        force = all(v for _, _, v in what)
        if has_root and repo_heads:
            if not force:
                raise Exception('Cannot push a new root')
            else:
                logging.warn('Pushing a new root')
        if force:
            repo_heads = [b'force']
        else:
            if not repo_heads:
                repo_heads = [NULL_NODE_ID]
            repo_heads = [unhexlify(h) for h in repo_heads]
    if push_commits and not dry_run:
        if repo.local():
            repo.local().ui.setconfig(b'server', b'validate', True)
        if unbundle20:
            b2caps = repo.capable(b'bundle2') or {}
        else:
            b2caps = {}
        if b2caps:
            b2caps = decodecaps(unquote_to_bytes(b2caps))
        logging.getLogger('bundle2').debug('%r', b2caps)
        if b2caps:
            b2caps[b'replycaps'] = encodecaps({b'error': [b'abort']})
        cg = create_bundle(store, push_commits, b2caps)
        if not isinstance(repo, HelperRepo):
            cg = chunkbuffer(cg)
            if not b2caps:
                cg = cg1unpacker(cg, b'UN')
        reply = repo.unbundle(cg, repo_heads, b'')
        if unbundle20 and isinstance(reply, unbundle20):
            parts = iter(reply.iterparts())
            for part in parts:
                logging.getLogger('bundle2').debug('part: %s', part.type)
                logging.getLogger('bundle2').debug('params: %r', part.params)
                if part.type == b'output':
                    sys.stderr.write(fsdecode(part.read()))
                elif part.type == b'reply:changegroup':
                    # TODO: should check params['in-reply-to']
                    reply = int(part.params[b'return'])
                elif part.type == b'error:abort':
                    message = part.params[b'message'].decode('utf-8')
                    hint = part.params.get(b'hint')
                    if hint:
                        message += '\n\n' + hint.decode('utf-8')
                    raise Exception(message)
                else:
                    logging.getLogger(b'bundle2').warning(
                        'ignoring bundle2 part: %s', part.type)
        pushed = reply != 0
    return gitdag(push_commits) if pushed or dry_run else ()
Пример #8
0
    def merge(self, git_repo_url, hg_repo_url, branch=None):
        # Eventually we'll want to handle a full merge, but for now, we only
        # handle the case where we don't have metadata to begin with.
        # The caller should avoid calling this function otherwise.
        assert not self._has_metadata
        remote_refs = OrderedDict()
        for line in Git.iter('ls-remote', fsdecode(git_repo_url),
                             stderr=open(os.devnull, 'wb')):
            sha1, ref = line.split(None, 1)
            remote_refs[ref] = sha1
        bundle = None
        if not remote_refs and urlparse(git_repo_url).scheme in (b'http',
                                                                 b'https'):
            try:
                bundle = HTTPReader(git_repo_url)
            except URLError as e:
                logging.error(e.reason)
                return False
            BUNDLE_SIGNATURE = b'# v2 git bundle\n'
            signature = bundle.read(len(BUNDLE_SIGNATURE))
            if signature != BUNDLE_SIGNATURE:
                logging.error('Could not find cinnabar metadata')
                return False
            bundle = io.BufferedReader(bundle)
            while True:
                line = bundle.readline().rstrip()
                if not line:
                    break
                sha1, ref = line.split(b' ', 1)
                remote_refs[ref] = sha1
        if branch:
            branches = [branch]
        else:
            branches = self._try_merge_branches(hg_repo_url)

        ref = self._find_branch(branches, remote_refs)
        if ref is None:
            logging.error('Could not find cinnabar metadata')
            return False

        if bundle:
            args = ('-v',) if util.progress else ()
            proc = GitProcess('index-pack', '--stdin', '--fix-thin', *args,
                              stdin=subprocess.PIPE,
                              stdout=open(os.devnull, 'wb'))
            shutil.copyfileobj(bundle, proc.stdin)
        else:
            fetch = ['fetch', '--no-tags', '--no-recurse-submodules', '-q']
            fetch.append('--progress' if util.progress else '--no-progress')
            fetch.append(fsdecode(git_repo_url))
            cmd = fetch + [fsdecode(ref) + ':refs/cinnabar/fetch']
            proc = GitProcess(*cmd, stdout=sys.stdout)
        if proc.wait():
            logging.error('Failed to fetch cinnabar metadata.')
            return False

        # Do some basic validation on the metadata we just got.
        commit = GitCommit(remote_refs[ref])
        if b'cinnabar@git' not in commit.author:
            logging.error('Invalid cinnabar metadata.')
            return False

        flags = set(commit.body.split())
        if b'files-meta' not in flags or b'unified-manifests-v2' not in flags \
                or len(commit.parents) != len(self.METADATA_REFS):
            logging.error('Invalid cinnabar metadata.')
            return False

        # At this point, we'll just assume this is good enough.

        # Get replace refs.
        if commit.tree != EMPTY_TREE:
            errors = False
            by_sha1 = {}
            for k, v in util.iteritems(remote_refs):
                if v not in by_sha1:
                    by_sha1[v] = k
            needed = []
            for line in Git.ls_tree(commit.tree):
                mode, typ, sha1, path = line
                if sha1 in by_sha1:
                    ref = b'refs/cinnabar/replace/%s' % path
                    if bundle:
                        Git.update_ref(ref, sha1)
                    else:
                        needed.append(
                            fsdecode(b':'.join((by_sha1[sha1], ref))))
                else:
                    logging.error('Missing commit: %s', sha1)
                    errors = True
            if errors:
                return False

            if not bundle:
                cmd = fetch + needed
                proc = GitProcess(*cmd, stdout=sys.stdout)
                if proc.wait():
                    logging.error('Failed to fetch cinnabar metadata.')
                    return False

        Git.update_ref(b'refs/cinnabar/metadata', commit.sha1)
        self._metadata_sha1 = commit.sha1
        GitHgHelper.reload()
        Git.delete_ref(b'refs/cinnabar/fetch')

        # TODO: avoid the duplication of code with __init__
        metadata = self.metadata()

        if not metadata:
            # This should never happen, but just in case.
            logging.warn('Could not find cinnabar metadata')
            Git.delete_ref(b'refs/cinnabar/metadata')
            GitHgHelper.reload()
            return False

        metadata, refs = metadata
        self._has_metadata = True
        self._metadata_refs = refs if metadata else {}
        changesets_ref = self._metadata_refs.get(b'refs/cinnabar/changesets')
        self._generation = 0
        if changesets_ref:
            commit = GitCommit(changesets_ref)
            for n, head in enumerate(commit.body.splitlines()):
                hghead, branch = head.split(b' ', 1)
                self._hgheads._previous[hghead] = (branch, 1)
                self._generation = n + 1

        self._manifest_heads_orig = set(GitHgHelper.heads(b'manifests'))

        for line in Git.ls_tree(metadata.tree):
            mode, typ, sha1, path = line
            self._replace[path] = sha1

        return True
Пример #9
0
    def push(self, *refspecs):
        try:
            default = b'never' if self._graft else b'phase'
            values = {
                None: default,
                b'': default,
                b'never': b'never',
                b'phase': b'phase',
                b'always': b'always',
            }
            data = Git.config('cinnabar.data', self._remote.name,
                              values=values)
        except InvalidConfig as e:
            logging.error(str(e))
            return 1

        pushes = list((Git.resolve_ref(fsdecode(s.lstrip(b'+'))), d,
                       s.startswith(b'+'))
                      for s, d in (r.split(b':', 1) for r in refspecs))
        if not self._repo.capable(b'unbundle'):
            for source, dest, force in pushes:
                self._helper.write(
                    b'error %s Remote does not support the "unbundle" '
                    b'capability\n' % dest)
            self._helper.write(b'\n')
            self._helper.flush()
        else:
            repo_heads = self._branchmap.heads()
            PushStore.adopt(self._store, self._graft)
            pushed = push(self._repo, self._store, pushes, repo_heads,
                          self._branchmap.names(), self._dry_run)

            status = {}
            for source, dest, _ in pushes:
                if dest.startswith(b'refs/tags/'):
                    if source:
                        status[dest] = b'Pushing tags is unsupported'
                    else:
                        status[dest] = \
                            b'Deleting remote tags is unsupported'
                    continue
                bookmark_prefix = strip_suffix(
                    (self._bookmark_template or b''), b'%s')
                if not bookmark_prefix or not dest.startswith(bookmark_prefix):
                    if source:
                        status[dest] = bool(len(pushed))
                    else:
                        status[dest] = \
                            b'Deleting remote branches is unsupported'
                    continue
                name = unquote_to_bytes(dest[len(bookmark_prefix):])
                if source:
                    source = self._store.hg_changeset(source)
                status[dest] = self._repo.pushkey(
                    b'bookmarks', name, self._bookmarks.get(name, b''),
                    source or b'')

            for source, dest, force in pushes:
                if status[dest] is True:
                    self._helper.write(b'ok %s\n' % dest)
                elif status[dest]:
                    self._helper.write(b'error %s %s\n' % (dest, status[dest]))
                else:
                    self._helper.write(b'error %s nothing changed on remote\n'
                                       % dest)
            self._helper.write(b'\n')
            self._helper.flush()

            if not pushed or self._dry_run:
                data = False
            elif data == b'always':
                data = True
            elif data == b'phase':
                phases = self._repo.listkeys(b'phases')
                drafts = {}
                if not phases.get(b'publishing', False):
                    drafts = set(p for p, is_draft in iteritems(phases)
                                 if int(is_draft))
                if not drafts:
                    data = True
                else:
                    def draft_commits():
                        for d in drafts:
                            c = self._store.changeset_ref(d)
                            if c:
                                yield b'^%s^@' % c
                        for h in pushed.heads():
                            yield h

                    args = [b'--ancestry-path', b'--topo-order']
                    args.extend(draft_commits())

                    pushed_drafts = tuple(
                        c for c, t, p in GitHgHelper.rev_list(*args))

                    # Theoretically, we could have commits with no
                    # metadata that the remote declares are public, while
                    # the rest of our push is in a draft state. That is
                    # however so unlikely that it's not worth the effort
                    # to support partial metadata storage.
                    data = not bool(pushed_drafts)
            elif data == b'never':
                data = False

            self._store.close(rollback=not data)
Пример #10
0
    def import_(self, *refs):
        # If anything wrong happens at any time, we risk git picking
        # the existing refs/cinnabar refs, so remove them preventively.
        for sha1, ref in Git.for_each_ref('refs/cinnabar/refs/heads',
                                          'refs/cinnabar/hg',
                                          'refs/cinnabar/HEAD'):
            Git.delete_ref(ref)

        def resolve_head(head):
            resolved = self._refs.get(head)
            if resolved is None:
                return resolved
            if resolved.startswith(b'@'):
                return self._refs.get(resolved[1:])
            return resolved

        wanted_refs = {k: v for k, v in (
                       (h, resolve_head(h)) for h in refs) if v}
        heads = wanted_refs.values()
        if not heads:
            heads = self._branchmap.heads()

        try:
            # Mercurial can be an order of magnitude slower when creating
            # a bundle when not giving topological heads, which some of
            # the branch heads might not be.
            # http://bz.selenic.com/show_bug.cgi?id=4595
            # So, when we're pulling all branch heads, just ask for the
            # topological heads instead.
            # `heads` might contain known heads, if e.g. the remote has
            # never been pulled from, but we happen to have some of its
            # heads locally already.
            if self._has_unknown_heads:
                unknown_heads = self._branchmap.unknown_heads()
                if set(heads).issuperset(unknown_heads):
                    heads = set(self._branchmap.heads()) & unknown_heads
                getbundle(self._repo, self._store, heads,
                          self._branchmap.names())
        except Exception:
            wanted_refs = {}
            raise
        finally:
            for ref, value in iteritems(wanted_refs):
                ref = b'refs/cinnabar/' + ref
                Git.update_ref(ref, self._store.changeset_ref(value))

        self._store.close()

        self._helper.write(b'done\n')
        self._helper.flush()

        if self._remote.name and self._refs_style('heads'):
            if Git.config('fetch.prune', self._remote.name) != b'true':
                prune = 'remote.%s.prune' % fsdecode(self._remote.name)
                sys.stderr.write(
                    'It is recommended that you set "%(conf)s" or '
                    '"fetch.prune" to "true".\n'
                    '  git config %(conf)s true\n'
                    'or\n'
                    '  git config fetch.prune true\n'
                    % {'conf': prune}
                )

        if self._store.tag_changes:
            sys.stderr.write(
                '\nRun the following command to update tags:\n')
            sys.stderr.write('  git fetch --tags hg::tags: tag "*"\n')
Пример #11
0
def fsck_quick(force=False):
    status = FsckStatus()
    store = GitHgStore()

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if not metadata_commit:
        status.info('There does not seem to be any git-cinnabar metadata.\n'
                    'Is this a git-cinnabar clone?')
        return 1
    broken_metadata = Git.resolve_ref('refs/cinnabar/broken')
    checked_metadata = Git.resolve_ref('refs/cinnabar/checked')
    if checked_metadata == broken_metadata:
        checked_metadata = None
    if metadata_commit == checked_metadata and not force:
        status.info('The git-cinnabar metadata was already checked and is '
                    'presumably clean.\n'
                    'Try `--force` if you want to check anyways.')
        return 0
    elif force:
        checked_metadata = None

    commit = GitCommit(metadata_commit)
    if commit.body != b'files-meta unified-manifests-v2':
        status.info(
            'The git-cinnabar metadata is incompatible with this version.\n'
            'Please use the git-cinnabar version it was used with last.\n')
        return 1
    if len(commit.parents) > 6 or len(commit.parents) < 5:
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1
    changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5]

    commit = GitCommit(changesets)
    heads = OrderedDict(
        (node, branch)
        for node, _, branch in (d.partition(b' ')
                                for d in commit.body.splitlines()))
    if len(heads) != len(commit.parents):
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1

    manifest_nodes = []

    parents = None
    fix_changeset_heads = False

    def get_checked_metadata(num):
        if not checked_metadata:
            return None
        commit = Git.resolve_ref('{}^{}'.format(
            checked_metadata.decode('ascii'), num))
        if commit:
            return GitCommit(commit)

    checked_commit = get_checked_metadata(1)
    # TODO: Check that the recorded heads are actually dag heads.
    for c, changeset_node in progress_iter(
            'Checking {} changeset heads',
        ((c, node) for c, node in zip(commit.parents, heads)
         if not checked_commit or c not in checked_commit.parents)):
        gitsha1 = GitHgHelper.hg2git(changeset_node)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for changeset %s' %
                          changeset_node.decode('ascii'))
            continue
        if gitsha1 != c:
            if parents is None:
                parents = set(commit.parents)
            if gitsha1 not in parents:
                status.report('Inconsistent metadata:\n'
                              '  Head metadata says changeset %s maps to %s\n'
                              '  but hg2git metadata says it maps to %s' %
                              (changeset_node.decode('ascii'),
                               c.decode('ascii'), gitsha1.decode('ascii')))
                continue
            fix_changeset_heads = True
        changeset = store._changeset(c, include_parents=True)
        if not changeset:
            status.report('Missing git2hg metadata for git commit %s' %
                          c.decode('ascii'))
            continue
        if changeset.node != changeset_node:
            if changeset.node not in heads:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says %s maps to changeset %s\n'
                    '  but git2hg metadata says it maps to changeset %s' %
                    (c.decode('ascii'), changeset_node.decode('ascii'),
                     changeset.node.decode('ascii')))
                continue
            fix_changeset_heads = True
        if changeset.node != changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' %
                          changeset.node.decode('ascii'))
            continue
        changeset_branch = changeset.branch or b'default'
        if heads[changeset.node] != changeset_branch:
            status.report(
                'Inconsistent metadata:\n'
                '  Head metadata says changeset %s is in branch %s\n'
                '  but git2hg metadata says it is in branch %s' %
                (changeset.node.decode('ascii'), fsdecode(
                    heads[changeset.node]), fsdecode(changeset_branch)))
            continue
        manifest_nodes.append(changeset.manifest)

    if status('broken'):
        return 1

    # Rebuilding manifests benefits from limiting the difference with
    # the last rebuilt manifest. Similarly, building the list of unique
    # files in all manifests benefits from that too.
    # Unfortunately, the manifest heads are not ordered in a topological
    # relevant matter, and the differences between two consecutive manifests
    # can be much larger than they could be. The consequence is spending a
    # large amount of time rebuilding the manifests and gathering the files
    # list. It's actually faster to attempt to reorder them according to
    # some heuristics first, such that the differences are smaller.
    # Here, we use the depth from the root node(s) to reorder the manifests.
    # This doesn't give the most optimal ordering, but it's already much
    # faster. On a clone of multiple mozilla-* repositories with > 1400 heads,
    # it's close to an order of magnitude difference on the "Checking
    # manifests" loop.
    depths = {}
    roots = set()
    manifest_queue = []
    revs = []
    revs.append(b'%s^@' % manifests)
    if checked_metadata:
        revs.append(b'^%s^2^@' % checked_metadata)
    for m, _, parents in progress_iter(
            'Loading {} manifests',
            GitHgHelper.rev_list(b'--topo-order', b'--reverse',
                                 b'--full-history', *revs)):
        manifest_queue.append((m, parents))
        for p in parents:
            if p not in depths:
                roots.add(p)
            depths[m] = max(depths.get(p, 0) + 1, depths.get(m, 0))

    if status('broken'):
        return 1

    # TODO: check that all manifest_nodes gathered above are available in the
    # manifests dag, and that the dag heads are the recorded heads.
    manifests_commit = GitCommit(manifests)
    checked_commit = get_checked_metadata(2)
    depths = [(depths.get(p, 0), p) for p in manifests_commit.parents
              if not checked_commit or p not in checked_commit.parents]
    manifests_commit_parents = [p for _, p in sorted(depths)]
    previous = None
    all_interesting = set()
    for m in progress_iter('Checking {} manifest heads',
                           manifests_commit_parents):
        c = GitCommit(m)
        if not SHA1_RE.match(c.body):
            status.report('Invalid manifest metadata in git commit %s' %
                          m.decode('ascii'))
            continue
        gitsha1 = GitHgHelper.hg2git(c.body)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for manifest %s' %
                          c.body.decode('ascii'))
            continue
        if not GitHgHelper.check_manifest(c.body):
            status.report('Sha1 mismatch for manifest %s' %
                          c.body.decode('ascii'))

        files = {}
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, m):
                if d in b'AM' and before != after and \
                        (path, after) not in all_interesting:
                    files[path] = after
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True):
                if (path, sha1) not in all_interesting:
                    files[path] = sha1
        all_interesting.update(iteritems(files))
        previous = m

    if status('broken'):
        return 1

    # Don't check files that were already there in the previously checked
    # manifests.
    previous = None
    for r in roots:
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, r):
                if d in b'AM' and before != after:
                    all_interesting.discard((path, after))
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(r, recursive=True):
                all_interesting.discard((path, sha1))
        previous = r

    progress = Progress('Checking {} files')
    while all_interesting and manifest_queue:
        (m, parents) = manifest_queue.pop()
        changes = get_changes(m, parents, all=True)
        for path, hg_file, hg_fileparents in changes:
            if hg_fileparents[1:] == (hg_file, ):
                continue
            elif hg_fileparents[:1] == (hg_file, ):
                continue
            # Reaching here means the file received a modification compared
            # to its parents. If it's a file we're going to check below,
            # it means we don't need to check its parents if somehow they were
            # going to be checked. If it's not a file we're going to check
            # below, it's because it's either a file we weren't interested in
            # in the first place, or it's the parent of a file we have checked.
            # Either way, we aren't interested in the parents.
            for p in hg_fileparents:
                all_interesting.discard((path, p))
            if (path, hg_file) not in all_interesting:
                continue
            all_interesting.remove((path, hg_file))
            if not GitHgHelper.check_file(hg_file, *hg_fileparents):
                p = store.manifest_path(path)
                status.report('Sha1 mismatch for file %s\n'
                              '  revision %s' %
                              (fsdecode(p), hg_file.decode('ascii')))

                print_parents = ' '.join(
                    p.decode('ascii') for p in hg_fileparents
                    if p != NULL_NODE_ID)
                if print_parents:
                    status.report('  with parent%s %s' %
                                  ('s' if len(print_parents) > 41 else '',
                                   print_parents))
            progress.progress()
    progress.finish()
    if all_interesting:
        status.info('Could not find the following files:')
        for path, sha1 in sorted(all_interesting):
            p = store.manifest_path(path)
            status.info('  %s %s' % (sha1.decode('ascii'), fsdecode(p)))
        status.info('This might be a bug in `git cinnabar fsck`. Please open '
                    'an issue, with the message above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        return 1

    check_replace(store)

    if status('broken'):
        status.info('Your git-cinnabar repository appears to be corrupted.\n'
                    'Please open an issue, with the information above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        Git.update_ref(b'refs/cinnabar/broken', metadata_commit)
        if checked_metadata:
            status.info(
                '\nThen please try to run `git cinnabar rollback --fsck` to '
                'restore last known state, and to update from the mercurial '
                'repository.')
        else:
            status.info('\nThen please try to run `git cinnabar reclone`.')
        status.info(
            '\nPlease note this may affect the commit sha1s of mercurial '
            'changesets, and may require to rebase your local branches.')
        status.info(
            '\nAlternatively, you may start afresh with a new clone. In any '
            'case, please keep this corrupted repository around for further '
            'debugging.')
        return 1

    refresh = []
    if fix_changeset_heads:
        status.fix('Fixing changeset heads metadata order.')
        refresh.append('refs/cinnabar/changesets')
    interval_expired('fsck', 0)
    store.close(refresh=refresh)
    GitHgHelper._helper = False
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    Git.update_ref(b'refs/cinnabar/checked', metadata_commit)
    return 0
Пример #12
0
def fsck(args):
    '''check cinnabar metadata consistency'''

    if not args.commit and not args.full:
        return fsck_quick(args.force)

    status = FsckStatus()

    store = GitHgStore()

    if args.full and args.commit:
        logging.error('Cannot pass both --full and a commit')
        return 1

    if args.commit:
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            cs = store.hg_changeset(c)
            if cs:
                commits.add(c)
                c = cs.node
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not cs:
                status.info('Unknown commit or changeset: %s' %
                            c.decode('ascii'))
                return 1
            if not cs:
                cs = store.hg_changeset(commit)
                commits.add(commit)

        all_git_commits = GitHgHelper.rev_list(b'--no-walk=unsorted', *commits)
    else:
        all_refs = dict(
            (ref, sha1) for sha1, ref in Git.for_each_ref('refs/cinnabar'))

        if b'refs/cinnabar/metadata' in all_refs:
            git_heads = b'%s^^@' % all_refs[b'refs/cinnabar/metadata']
        else:
            assert False

        all_git_commits = GitHgHelper.rev_list(b'--topo-order',
                                               b'--full-history', b'--reverse',
                                               git_heads)

    dag = gitdag()

    GitHgHelper.reset_heads(b'manifests')

    full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG)

    for node, tree, parents in progress_iter('Checking {} changesets',
                                             all_git_commits):
        node = store._replace.get(node, node)
        hg_node = store.hg_changeset(node)
        if not hg_node:
            status.report('Missing note for git commit: ' +
                          node.decode('ascii'))
            continue
        GitHgHelper.seen(b'git2hg', node)

        changeset_data = store.changeset(hg_node)
        changeset = changeset_data.node

        GitHgHelper.seen(b'hg2git', changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            status.report('Missing changeset in hg2git branch: %s' %
                          changeset.decode('ascii'))
            continue
        elif bytes(changeset_ref) != node:
            status.report(
                'Commit mismatch for changeset %s\n'
                '  hg2git: %s\n  commit: %s' %
                (changeset.decode('ascii'), changeset_ref.decode('ascii'),
                 node.decode('ascii')))

        hg_changeset = store.changeset(changeset, include_parents=True)
        if hg_changeset.node != hg_changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' %
                          changeset.decode('ascii'))

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.branch or b'default')

        raw_changeset = Changeset.from_git_commit(node)
        patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data)
        if patcher != store.read_changeset_data(node):
            status.fix('Adjusted changeset metadata for %s' %
                       changeset.decode('ascii'))
            GitHgHelper.set(b'changeset', changeset, NULL_NODE_ID)
            GitHgHelper.set(b'changeset', changeset, node)
            GitHgHelper.put_blob(patcher, want_sha1=False)
            GitHgHelper.set(b'changeset-metadata', changeset, NULL_NODE_ID)
            GitHgHelper.set(b'changeset-metadata', changeset, b':1')

        manifest = changeset_data.manifest
        if GitHgHelper.seen(b'hg2git', manifest) or manifest == NULL_NODE_ID:
            continue
        manifest_ref = store.manifest_ref(manifest)
        if not manifest_ref:
            status.report('Missing manifest in hg2git branch: %s' %
                          manifest.decode('ascii'))

        parents = tuple(
            store.changeset(p).manifest for p in hg_changeset.parents)
        git_parents = tuple(
            store.manifest_ref(p) for p in parents if p != NULL_NODE_ID)

        # This doesn't change the value but makes the helper track the manifest
        # dag.
        GitHgHelper.set(b'manifest', manifest, manifest_ref)

        if not GitHgHelper.check_manifest(manifest):
            status.report('Sha1 mismatch for manifest %s' %
                          manifest.decode('ascii'))

        manifest_commit_parents = GitCommit(manifest_ref).parents
        if sorted(manifest_commit_parents) != sorted(git_parents):
            # TODO: better error
            status.report(
                '%s(%s) %s != %s' %
                (manifest.decode('ascii'), manifest_ref.decode('ascii'),
                 ' '.join(p.decode('ascii')
                          for p in manifest_commit_parents), ' '.join(
                              p.decode('ascii') for p in git_parents)))

        # TODO: check that manifest content matches changeset content

        changes = get_changes(manifest_ref, git_parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE
                                            or GitHgHelper.seen(
                                                b'hg2git', hg_file)):
                if full_file_check:
                    file = store.file(hg_file, hg_fileparents)
                    valid = file.node == file.sha1
                else:
                    valid = GitHgHelper.check_file(hg_file, *hg_fileparents)
                if not valid:
                    status.report('Sha1 mismatch for file %s in manifest %s' %
                                  (hg_file.decode('ascii'),
                                   manifest_ref.decode('ascii')))

    if not args.commit and not status('broken'):
        store_manifest_heads = set(store._manifest_heads_orig)
        manifest_heads = set(GitHgHelper.heads(b'manifests'))
        if store_manifest_heads != manifest_heads:

            def iter_manifests(a, b):
                for h in a - b:
                    yield h
                for h in b:
                    yield b'^%s' % h

            for m, t, p in GitHgHelper.rev_list(
                    b'--topo-order', b'--full-history', b'--reverse',
                    *iter_manifests(manifest_heads, store_manifest_heads)):
                status.fix('Missing manifest commit in manifest branch: %s' %
                           m.decode('ascii'))

            for m, t, p in GitHgHelper.rev_list(
                    b'--topo-order', b'--full-history', b'--reverse',
                    *iter_manifests(store_manifest_heads, manifest_heads)):
                status.fix('Removing manifest commit %s with no corresponding '
                           'changeset' % (m.decode('ascii')))

            for h in store_manifest_heads - manifest_heads:
                if GitHgHelper.seen(b'hg2git', store.hg_manifest(h)):
                    status.fix('Removing non-head reference to %s in manifests'
                               ' metadata.' % h.decode('ascii'))
    dangling = ()
    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling(b'hg2git')
    for obj in dangling:
        status.fix('Removing dangling metadata for ' + obj.decode('ascii'))
        # Theoretically, we should figure out if they are files, manifests
        # or changesets and set the right variable accordingly, but in
        # practice, it makes no difference. Reevaluate when GitHgStore.close
        # is modified, though.
        GitHgHelper.set(b'file', obj, NULL_NODE_ID)
        GitHgHelper.set(b'file-meta', obj, NULL_NODE_ID)

    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling(b'git2hg')
    for c in dangling:
        status.fix('Removing dangling note for commit ' + c.decode('ascii'))
        GitHgHelper.set(b'changeset-metadata', c, NULL_NODE_ID)

    check_replace(store)

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted. There\n'
            'are known issues in older revisions that have been fixed.\n'
            'Please try running the following command to reset:\n'
            '  git cinnabar reclone\n\n'
            'Please note this command may change the commit sha1s. Your\n'
            'local branches will however stay untouched.\n'
            'Please report any corruption that fsck would detect after a\n'
            'reclone.')

    if not args.commit:
        status.info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                status.fix('Adding missing head %s in branch %s' %
                           (head.decode('ascii'), fsdecode(branch)))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                status.fix('Removing non-head reference to %s in branch %s' %
                           (head.decode('ascii'), fsdecode(branch)))
                del store._hgheads[head]

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if status('broken'):
        Git.update_ref(b'refs/cinnabar/broken', metadata_commit)
        return 1

    if args.full:
        Git.update_ref(b'refs/cinnabar/checked', metadata_commit)
    interval_expired('fsck', 0)
    store.close()

    if status('fixed'):
        return 2
    return 0