Пример #1
0
    def _populate_tree_entries(self, dirty_trees):
        self._dirs.setdefault('', dulobjs.Tree())

        # Fill in missing directories.
        for path in self._dirs.keys():
            parent = os.path.dirname(path)

            while parent != '':
                parent_tree = self._dirs.get(parent, None)

                if parent_tree is not None:
                    break

                self._dirs[parent] = dulobjs.Tree()
                parent = os.path.dirname(parent)

        for dirty in list(dirty_trees):
            parent = os.path.dirname(dirty)

            while parent != '':
                if parent in dirty_trees:
                    break

                dirty_trees.add(parent)
                parent = os.path.dirname(parent)

        # The root tree is always dirty but doesn't always get updated.
        dirty_trees.add('')

        # We only need to recalculate and export dirty trees.
        for d in sorted(dirty_trees, key=len, reverse=True):
            # Only happens for deleted directories.
            try:
                tree = self._dirs[d]
            except KeyError:
                continue

            yield tree

            if d == '':
                continue

            parent_tree = self._dirs[os.path.dirname(d)]

            # Accessing the tree's ID is what triggers SHA-1 calculation and is
            # the expensive part (at least if the tree has been modified since
            # the last time we retrieved its ID). Also, assigning an entry to a
            # tree (even if it already exists) invalidates the existing tree
            # and incurs SHA-1 recalculation. So, it's in our interest to avoid
            # invalidating trees. Since we only update the entries of dirty
            # trees, this should hold true.
            parent_tree[os.path.basename(d)] = (stat.S_IFDIR, tree.id)
Пример #2
0
    def test_commit_config_identity_in_memoryrepo(self):
        # commit falls back to the users' identity if it wasn't specified
        r = MemoryRepo.init_bare([], {})
        c = r.get_config()
        c.set(("user", ), "name", "Jelmer")
        c.set(("user", ), "email", "*****@*****.**")

        commit_sha = r.do_commit('message', tree=objects.Tree().id)
        self.assertEqual("Jelmer <*****@*****.**>", r[commit_sha].author)
        self.assertEqual("Jelmer <*****@*****.**>", r[commit_sha].committer)
Пример #3
0
 def __setitem__(self, path, value):
     base, name = os.path.split(path)
     parent = self.get(base)
     if parent is None:
         # Empty trees will be filled during finalization (see comment below)
         self[base] = dulobjs.Tree()
     # In an ideal world we would assign the child to the parent here, but
     # parents store a reference to the child's id which we don't have since
     # the tree is still being mutated. Instead we create empty parents where
     # needed, and later in _populate_tree_entries we attach all the children
     # to parents bottom-up.
     self.trees[path] = value
Пример #4
0
 def _CommitFile(self, contents, commit_message, branch=None):
     blob = objects.Blob.from_string(contents)
     tree = objects.Tree()
     tree.add(b'myfile', 0o100644, blob.id)
     self.repo.object_store.add_object(blob)
     self.repo.object_store.add_object(tree)
     commit_id = self.repo.do_commit(
         tree=tree.id,
         message=commit_message,
         ref=branch,
         committer='User Larry <*****@*****.**>')
     self.repo[b'HEAD'] = commit_id
     return commit_id
Пример #5
0
    def _handle_subrepos(self, ctx, dirty_trees):
        substate = parse_hgsubstate(ctx['.hgsubstate'].data().splitlines())
        sub = OrderedDict()

        if '.hgsub' in ctx:
            sub = parse_hgsub(ctx['.hgsub'].data().splitlines())

        for path, sha in substate.iteritems():
            # Ignore non-Git repositories keeping state in .hgsubstate.
            if path in sub and not sub[path].startswith('[git]'):
                continue

            d = os.path.dirname(path)
            dirty_trees.add(d)
            tree = self._dirs.setdefault(d, dulobjs.Tree())
            tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha)
Пример #6
0
    def _remove_path(self, path, dirty_trees):
        """Remove a path (file or git link) from the current changeset.

        If the tree containing this path is empty, it might be removed."""
        d = os.path.dirname(path)
        tree = self._dirs.get(d, dulobjs.Tree())

        del tree[os.path.basename(path)]
        dirty_trees.add(d)

        # If removing this file made the tree empty, we should delete this
        # tree. This could result in parent trees losing their only child
        # and so on.
        if not len(tree):
            self._remove_tree(d)
        else:
            self._dirs[d] = tree
Пример #7
0
    def _get_missing_trees(self, path, root_tree):
        """
        Creates missing ``Tree`` objects for the given path.

        :param path: path given as a string. It may be a path to a file node
          (i.e. ``foo/bar/baz.txt``) or directory path - in that case it must
          end with slash (i.e. ``foo/bar/``).
        :param root_tree: ``dulwich.objects.Tree`` object from which we start
          traversing (should be commit's root tree)
        """
        dirpath = posixpath.split(path)[0]
        dirs = dirpath.split('/')
        if not dirs or dirs == ['']:
            return []

        def get_tree_for_dir(tree, dirname):
            for name, mode, id in tree.iteritems():
                if name == dirname:
                    obj = self.repository._repo[id]
                    if isinstance(obj, objects.Tree):
                        return obj
                    else:
                        raise RepositoryError(
                            "Cannot create directory %s "
                            "at tree %s as path is occupied and is not a "
                            "Tree" % (dirname, tree))
            return None

        trees = []
        parent = root_tree
        for dirname in dirs:
            tree = get_tree_for_dir(parent, dirname)
            if tree is None:
                tree = objects.Tree()
                dirmode = 040000
                parent.add(dirmode, dirname, tree.id)
                parent = tree
            # Always append tree
            trees.append(tree)
        return trees
Пример #8
0
 def __init__(self, store, commit):
     self.store = store
     self.trees = {}
     self.trees[""] = store[
         commit.tree] if commit is not None else dulobjs.Tree()
Пример #9
0
    def update_changeset(self, newctx):
        """Set the tree to track a new Mercurial changeset.

        This is a generator of 2-tuples. The first item in each tuple is a
        dulwich object, either a Blob or a Tree. The second item is the
        corresponding Mercurial nodeid for the item, if any. Only blobs will
        have nodeids. Trees do not correspond to a specific nodeid, so it does
        not make sense to emit a nodeid for them.

        When exporting trees from Mercurial, callers typically write the
        returned dulwich object to the Git repo via the store's add_object().

        Some emitted objects may already exist in the Git repository. This
        class does not know about the Git repository, so it's up to the caller
        to conditionally add the object, etc.

        Emitted objects are those that have changed since the last call to
        update_changeset. If this is the first call to update_chanageset, all
        objects in the tree are emitted.
        """
        # Our general strategy is to accumulate dulwich.objects.Blob and
        # dulwich.objects.Tree instances for the current Mercurial changeset.
        # We do this incremental by iterating over the Mercurial-reported
        # changeset delta. We rely on the behavior of Mercurial to lazy
        # calculate a Tree's SHA-1 when we modify it. This is critical to
        # performance.

        # In theory we should be able to look at changectx.files(). This is
        # *much* faster. However, it may not be accurate, especially with older
        # repositories, which may not record things like deleted files
        # explicitly in the manifest (which is where files() gets its data).
        # The only reliable way to get the full set of changes is by looking at
        # the full manifest. And, the easy way to compare two manifests is
        # localrepo.status().
        modified, added, removed = self._hg.status(self._ctx, newctx)[0:3]

        # We track which directories/trees have modified in this update and we
        # only export those.
        dirty_trees = set()

        for path in removed:
            self._remove_path(path, dirty_trees)

        # For every file that changed or was added, we need to calculate the
        # corresponding Git blob and its tree entry. We emit the blob
        # immediately and update trees to be aware of its presence.
        for path in set(modified) | set(added):
            audit_git_path(self._hg.ui, path)
            d = os.path.dirname(path)
            tree = self._dirs.setdefault(d, dulobjs.Tree())
            dirty_trees.add(d)

            fctx = newctx[path]

            func = IncrementalChangesetExporter.tree_entry
            entry, blob = func(fctx, self._blob_cache)
            if blob is not None:
                yield (blob, fctx.filenode())

            tree.add(*entry)

        # Now that all the trees represent the current changeset, recalculate
        # the tree IDs and emit them. Note that we wait until now to calculate
        # tree SHA-1s. This is an important difference between us and
        # dulwich.index.commit_tree(), which builds new Tree instances for each
        # series of blobs.
        for obj in self._populate_tree_entries(dirty_trees):
            yield (obj, None)

        self._ctx = newctx
Пример #10
0
    def commit(self,
               message,
               author,
               parents=None,
               branch=None,
               date=None,
               **kwargs):
        """
        Performs in-memory commit (doesn't check workdir in any way) and
        returns newly created ``Changeset``. Updates repository's
        ``revisions``.

        :param message: message of the commit
        :param author: full username, i.e. "Joe Doe <*****@*****.**>"
        :param parents: single parent or sequence of parents from which commit
          would be derieved
        :param date: ``datetime.datetime`` instance. Defaults to
          ``datetime.datetime.now()``.
        :param branch: branch name, as string. If none given, default backend's
          branch would be used.

        :raises ``CommitError``: if any error occurs while committing
        """
        self.check_integrity(parents)

        from .repository import GitRepository
        if branch is None:
            branch = GitRepository.DEFAULT_BRANCH_NAME

        repo = self.repository._repo
        object_store = repo.object_store

        ENCODING = "UTF-8"
        DIRMOD = 040000

        # Create tree and populates it with blobs
        commit_tree = self.parents[0] and repo[self.parents[0]._commit.tree] or\
            objects.Tree()
        for node in self.added + self.changed:
            # Compute subdirs if needed
            dirpath, nodename = posixpath.split(node.path)
            dirnames = dirpath and dirpath.split('/') or []
            parent = commit_tree
            ancestors = [('', parent)]

            # Tries to dig for the deepest existing tree
            while dirnames:
                curdir = dirnames.pop(0)
                try:
                    dir_id = parent[curdir][1]
                except KeyError:
                    # put curdir back into dirnames and stops
                    dirnames.insert(0, curdir)
                    break
                else:
                    # If found, updates parent
                    parent = self.repository._repo[dir_id]
                    ancestors.append((curdir, parent))
            # Now parent is deepest existing tree and we need to create subtrees
            # for dirnames (in reverse order) [this only applies for nodes from added]
            new_trees = []

            if not node.is_binary:
                content = node.content.encode(ENCODING)
            else:
                content = node.content
            blob = objects.Blob.from_string(content)

            node_path = node.name.encode(ENCODING)
            if dirnames:
                # If there are trees which should be created we need to build
                # them now (in reverse order)
                reversed_dirnames = list(reversed(dirnames))
                curtree = objects.Tree()
                curtree[node_path] = node.mode, blob.id
                new_trees.append(curtree)
                for dirname in reversed_dirnames[:-1]:
                    newtree = objects.Tree()
                    #newtree.add(DIRMOD, dirname, curtree.id)
                    newtree[dirname] = DIRMOD, curtree.id
                    new_trees.append(newtree)
                    curtree = newtree
                parent[reversed_dirnames[-1]] = DIRMOD, curtree.id
            else:
                parent.add(name=node_path, mode=node.mode, hexsha=blob.id)

            new_trees.append(parent)
            # Update ancestors
            for parent, tree, path in reversed([
                (a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])
            ]):
                parent[path] = DIRMOD, tree.id
                object_store.add_object(tree)

            object_store.add_object(blob)
            for tree in new_trees:
                object_store.add_object(tree)
        for node in self.removed:
            paths = node.path.split('/')
            tree = commit_tree
            trees = [tree]
            # Traverse deep into the forest...
            for path in paths:
                try:
                    obj = self.repository._repo[tree[path][1]]
                    if isinstance(obj, objects.Tree):
                        trees.append(obj)
                        tree = obj
                except KeyError:
                    break
            # Cut down the blob and all rotten trees on the way back...
            for path, tree in reversed(zip(paths, trees)):
                del tree[path]
                if tree:
                    # This tree still has elements - don't remove it or any
                    # of it's parents
                    break

        object_store.add_object(commit_tree)

        # Create commit
        commit = objects.Commit()
        commit.tree = commit_tree.id
        commit.parents = [p._commit.id for p in self.parents if p]
        commit.author = commit.committer = safe_str(author)
        commit.encoding = ENCODING
        commit.message = safe_str(message)

        # Compute date
        if date is None:
            date = time.time()
        elif isinstance(date, datetime.datetime):
            date = time.mktime(date.timetuple())

        author_time = kwargs.pop('author_time', date)
        commit.commit_time = int(date)
        commit.author_time = int(author_time)
        tz = time.timezone
        author_tz = kwargs.pop('author_timezone', tz)
        commit.commit_timezone = tz
        commit.author_timezone = author_tz

        object_store.add_object(commit)

        ref = 'refs/heads/%s' % branch
        repo.refs[ref] = commit.id
        repo.refs.set_symbolic_ref('HEAD', ref)

        # Update vcs repository object & recreate dulwich repo
        self.repository.revisions.append(commit.id)
        # invalidate parsed refs after commit
        self.repository._parsed_refs = self.repository._get_parsed_refs()
        tip = self.repository.get_changeset()
        self.reset()
        return tip
Пример #11
0
    def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
        repo = self._factory.repo(wire)
        object_store = repo.object_store

        # Create tree and populates it with blobs
        commit_tree = commit_tree and repo[commit_tree] or objects.Tree()

        for node in updated:
            # Compute subdirs if needed
            dirpath, nodename = vcspath.split(node['path'])
            dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
            parent = commit_tree
            ancestors = [('', parent)]

            # Tries to dig for the deepest existing tree
            while dirnames:
                curdir = dirnames.pop(0)
                try:
                    dir_id = parent[curdir][1]
                except KeyError:
                    # put curdir back into dirnames and stops
                    dirnames.insert(0, curdir)
                    break
                else:
                    # If found, updates parent
                    parent = repo[dir_id]
                    ancestors.append((curdir, parent))
            # Now parent is deepest existing tree and we need to create
            # subtrees for dirnames (in reverse order)
            # [this only applies for nodes from added]
            new_trees = []

            blob = objects.Blob.from_string(node['content'])

            if dirnames:
                # If there are trees which should be created we need to build
                # them now (in reverse order)
                reversed_dirnames = list(reversed(dirnames))
                curtree = objects.Tree()
                curtree[node['node_path']] = node['mode'], blob.id
                new_trees.append(curtree)
                for dirname in reversed_dirnames[:-1]:
                    newtree = objects.Tree()
                    newtree[dirname] = (DIR_STAT, curtree.id)
                    new_trees.append(newtree)
                    curtree = newtree
                parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
            else:
                parent.add(name=node['node_path'],
                           mode=node['mode'],
                           hexsha=blob.id)

            new_trees.append(parent)
            # Update ancestors
            reversed_ancestors = reversed([
                (a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])
            ])
            for parent, tree, path in reversed_ancestors:
                parent[path] = (DIR_STAT, tree.id)
                object_store.add_object(tree)

            object_store.add_object(blob)
            for tree in new_trees:
                object_store.add_object(tree)

        for node_path in removed:
            paths = node_path.split('/')
            tree = commit_tree
            trees = [tree]
            # Traverse deep into the forest...
            for path in paths:
                try:
                    obj = repo[tree[path][1]]
                    if isinstance(obj, objects.Tree):
                        trees.append(obj)
                        tree = obj
                except KeyError:
                    break
            # Cut down the blob and all rotten trees on the way back...
            for path, tree in reversed(zip(paths, trees)):
                del tree[path]
                if tree:
                    # This tree still has elements - don't remove it or any
                    # of it's parents
                    break

        object_store.add_object(commit_tree)

        # Create commit
        commit = objects.Commit()
        commit.tree = commit_tree.id
        for k, v in commit_data.iteritems():
            setattr(commit, k, v)
        object_store.add_object(commit)

        self.create_branch(wire, branch, commit.id)

        # dulwich set-ref
        ref = 'refs/heads/%s' % branch
        repo.refs[ref] = commit.id

        return commit.id