Exemplo n.º 1
0
def write_file(repo: pygit2.Repository, tree: pygit2.Tree, filepath: str,
               contents: str) -> pygit2.Oid:
    blob = repo.create_blob(contents.encode("utf-8"))
    paths = filepath.split("/")
    trees = [tree]
    for path in paths[:-1]:
        try:
            to_insert = repo[trees[0][path].oid]
        except KeyError:
            to_insert = None
        trees.insert(0, to_insert)

    to_insert = blob
    for path in reversed(paths):
        tree = trees.pop(0)
        assert isinstance(to_insert, pygit2.Oid)
        assert isinstance(repo[to_insert], pygit2.Blob) or isinstance(
            repo[to_insert], pygit2.Tree)
        if tree is None:
            tb = repo.TreeBuilder()
        else:
            tb = repo.TreeBuilder(tree)
        tb.insert(
            path, to_insert, pygit2.GIT_FILEMODE_BLOB if isinstance(
                repo[to_insert], pygit2.Blob) else pygit2.GIT_FILEMODE_TREE)
        to_insert = tb.write()

    assert len(trees) == 0

    return to_insert
Exemplo n.º 2
0
class GitBlack:
    def __init__(self):
        self.repo = Repository(".")
        self.patchers = {}

    def get_blamed_deltas(self, patch):
        filename = patch.delta.old_file.path
        self.patchers[filename] = Patcher(self.repo, filename)
        hb = HunkBlamer(self.repo, patch)
        return hb.blames()

    def group_blame_deltas(self, blames):
        for delta_blame in blames:
            commits = tuple(sorted(delta_blame.commits))
            self.grouped_deltas.setdefault(commits,
                                           []).append(delta_blame.delta)

        self.progress += 1
        now = time.monotonic()
        if now - self.last_log > 0.04:
            sys.stdout.write("Reading file {}/{} \r".format(
                self.progress, self.total))
            sys.stdout.flush()
            self.last_log = now

    def commit_changes(self):
        start = time.monotonic()
        self.grouped_deltas = {}

        for path, status in self.repo.status().items():
            if status & index_statuses:
                raise GitIndexNotEmpty

        patches = []
        self._file_modes = {}
        diff = self.repo.diff(context_lines=0,
                              flags=GIT_DIFF_IGNORE_SUBMODULES)
        for patch in diff:
            if patch.delta.status != GIT_DELTA_MODIFIED:
                continue
            self._file_modes[
                patch.delta.old_file.path] = patch.delta.old_file.mode
            patches.append(patch)

        self.progress = 0
        self.last_log = 0
        self.total = len(patches)

        executor = ThreadPoolExecutor(max_workers=8)
        tasks = set()
        for patch in patches:
            tasks.add(executor.submit(self.get_blamed_deltas, patch))
            if len(tasks) > 8:
                done, not_done = wait(tasks, return_when=FIRST_COMPLETED)
                for task in done:
                    self.group_blame_deltas(task.result())
                tasks -= set(done)

        for task in tasks:
            self.group_blame_deltas(task.result())

        secs = time.monotonic() - start
        sys.stdout.write("Reading file {}/{} ({:.2f} secs).\n".format(
            self.progress, self.total, secs))

        start = time.monotonic()
        self.total = len(self.grouped_deltas)
        self.progress = 0
        self.last_log = 0

        for commits, deltas in self.grouped_deltas.items():
            blobs = self._create_blobs(deltas)
            self._commit(commits, blobs)

        secs = time.monotonic() - start
        print("Making commit {}/{} ({:.2f} secs).".format(
            self.progress, self.total, secs))

    def _create_blobs(self, deltas):
        filenames = set()
        for delta in deltas:
            self.patchers[delta.filename].apply(delta)
            filenames.add(delta.filename)

        blobs = {}
        for filename in filenames:
            blob_id = self.repo.create_blob(self.patchers[filename].content())
            blobs[filename] = blob_id

        return blobs

    def _commit(self, original_commits, blobs):
        for filename, blob_id in blobs.items():
            file_mode = self._file_modes[filename]
            index_entry = IndexEntry(filename, blob_id, file_mode)
            self.repo.index.add(index_entry)

        commits = [self.repo.get(h) for h in original_commits]

        main_commit = commits[0]
        if len(commits) > 1:
            # most recent commit
            main_commit = sorted(commits, key=commit_datetime)[-1]

        commit_message = main_commit.message
        commit_message += "\n\nautomatic commit by git-black, original commits:\n"
        commit_message += "\n".join(
            ["  {}".format(c) for c in original_commits])

        committer = Signature(
            name=self.repo.config["user.name"],
            email=self.repo.config["user.email"],
        )

        self.repo.index.write()
        tree = self.repo.index.write_tree()
        head = self.repo.head.peel()
        self.repo.create_commit("HEAD", main_commit.author, committer,
                                commit_message, tree, [head.id])
        self.progress += 1
        now = time.monotonic()
        if now - self.last_log > 0.04:
            sys.stdout.write("Making commit {}/{} \r".format(
                self.progress, self.total))
            sys.stdout.flush()
            self.last_log = now
Exemplo n.º 3
0
class GitBareBackend(object):

    nb_transactions = 0

    def __init__(self, path):
        self.path = abspath(path) + '/'
        # Open database
        self.path_data = '%s/database/' % self.path
        if not lfs.is_folder(self.path_data):
            error = '"%s" should be a folder, but it is not' % path
            raise ValueError, error
        # Open repository
        self.repo = Repository(self.path_data)
        # Read index
        try:
            tree = self.repo.head.peel(GIT_OBJ_TREE)
            self.repo.index.read_tree(tree.id)
        except:
            pass
        # Check git commiter
        try:
            _, _ = self.username, self.useremail
        except:
            print '========================================='
            print 'ERROR: Please configure GIT commiter via'
            print ' $ git config --global user.name'
            print ' $ git config --global user.email'
            print '========================================='
            raise

    @classmethod
    def init_backend(cls, path, init=False, soft=False):
        init_repository('{0}/database'.format(path), bare=True)

    #######################################################################
    # Internal utility functions
    #######################################################################
    def _call(self, command):
        """Interface to cal git.git for functions not yet implemented using
        libgit2.
        """
        popen = Popen(command, stdout=PIPE, stderr=PIPE, cwd=self.path_data)
        stdoutdata, stderrdata = popen.communicate()
        if popen.returncode != 0:
            raise EnvironmentError, (popen.returncode, stderrdata)
        return stdoutdata

    @lazy
    def username(self):
        cmd = ['git', 'config', '--get', 'user.name']
        try:
            username = self._call(cmd).rstrip()
        except EnvironmentError:
            raise ValueError(
                "Please configure 'git config --global user.name'")
        return username

    @lazy
    def useremail(self):
        cmd = ['git', 'config', '--get', 'user.email']
        try:
            useremail = self._call(cmd).rstrip()
        except EnvironmentError:
            raise ValueError(
                "Please configure 'git config --global user.email'")
        return useremail

    def _resolve_reference(self, reference):
        """This method returns the SHA the given reference points to. For now
        only HEAD is supported.

        FIXME This is quick & dirty. TODO Implement references in pygit2 and
        use them here.
        """
        # Case 1: SHA
        if len(reference) == 40:
            return reference

        # Case 2: reference
        reference = self.repo.lookup_reference(reference)
        try:
            reference = reference.resolve()
        except KeyError:
            return None

        return reference.target

    def normalize_key(self, path, __root=None):
        # Performance is critical so assume the path is already relative to
        # the repository.
        key = __root.resolve(path)
        if key and key[0] == '.git':
            err = "bad '{0}' path, access to the '.git' folder is denied"
            raise ValueError(err.format(path))
        return '/'.join(key)

    def handler_exists(self, key):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        try:
            tree[key]
        except:
            return False
        return True

    def get_handler_names(self, key):
        try:
            tree = self.repo.head.peel(GIT_OBJ_TREE)
            if key:
                tree_entry = tree[key]
                if tree_entry.type == 'blob':
                    raise ValueError
                tree = self.repo[tree_entry.id]
        except:
            yield None
        else:
            for item in tree:
                yield item.name

    def get_handler_data(self, key):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        tree_entry = tree[key]
        blob = self.repo[tree_entry.id]
        return blob.data

    def get_handler_mimetype(self, key):
        data = self.get_handler_data(key)
        return magic_from_buffer(data)

    def handler_is_file(self, key):
        return not self.handler_is_folder(key)

    def handler_is_folder(self, key):
        repository = self.repo
        if key == '':
            return True
        else:
            tree = repository.head.peel(GIT_OBJ_TREE)
            tree_entry = tree[key]
        return tree_entry.type == 'tree'

    def get_handler_mtime(self, key):
        # FIXME
        return datetime.utcnow().replace(tzinfo=fixed_offset(0))

    def traverse_resources(self):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        yield self.get_resource('/')
        for name in self.get_names(tree):
            if name[-9:] == '.metadata' and name != '.metadata':
                yield self.get_resource('/' + name[:-9])

    def get_names(self, tree, path=''):
        for entry in tree:
            base_path = '{0}/{1}'.format(path, entry.name)
            yield base_path
            if entry.filemode == GIT_FILEMODE_TREE:
                sub_tree = self.repo.get(entry.hex)
                for x in self.get_names(sub_tree, base_path):
                    yield x

    def do_transaction(self, commit_message, data, added, changed, removed,
                       handlers):
        self.nb_transactions += 1
        # Get informations
        git_author, git_date, git_msg, docs_to_index, docs_to_unindex = data
        git_msg = commit_message or git_msg or 'no comment'
        # List of Changed
        added_and_changed = list(added) + list(changed)
        # Build the tree from index
        index = self.repo.index
        for key in added_and_changed:
            handler = handlers.get(key)
            blob_id = self.repo.create_blob(handler.to_str())
            entry = IndexEntry(key, blob_id, GIT_FILEMODE_BLOB_EXECUTABLE)
            index.add(entry)
        for key in removed:
            index.remove(key)
        git_tree = index.write_tree()
        # Commit
        self.git_commit(git_msg, git_author, git_date, tree=git_tree)

    def git_commit(self, message, author=None, date=None, tree=None):
        """Equivalent to 'git commit', we must give the message and we can
        also give the author and date.
        """
        # Tree
        if tree is None:
            #tree = self.index.write_tree()
            raise ValueError('Please give me a tree')

        # Parent
        parent = self._resolve_reference('HEAD')
        parents = [parent] if parent else []

        # Committer
        when_time = time.time()
        when_offset = -(time.altzone if time.daylight else time.timezone)
        when_offset = when_offset / 60

        name = self.username
        email = self.useremail
        committer = Signature(name, email, when_time, when_offset)

        # Author
        if author is None:
            author = (name, email)

        if date:
            if date.tzinfo:
                from pytz import utc
                when_time = date.astimezone(utc)  # To UTC
                when_time = when_time.timetuple()  # As struct_time
                when_time = timegm(when_time)  # To unix time
                when_offset = date.utcoffset().seconds / 60
            else:
                err = "Worktree.git_commit doesn't support naive datatime yet"
                raise NotImplementedError, err

        author = Signature(author[0], author[1], when_time, when_offset)

        # Create the commit
        return self.repo.create_commit('HEAD', author, committer, message,
                                       tree, parents)

    def abort_transaction(self):
        # TODO: Remove created blobs
        pass
Exemplo n.º 4
0
class GitHandler(object):
    def __init__(self, path, repo_path=None, update_working_copy=True):
        """
        Start a git handler in given repository.
        `update_working_copy`: wether also to update the working copy.
            By default, the git handler will only work on the git database.
            Updating the working copy can take a lot of time in
            large repositories.
        """
        self.path = path
        if repo_path is None:
            repo_path = self.path
        self.repo_path = repo_path
        self.update_working_copy = update_working_copy
        self.repo = Repository(self.repo_path)
        self.working_tree = self.get_last_tree()
        self.tree_modifier = TreeModifier(self.repo, self.working_tree)
        self.messages = []
        print("Started libgit2 git handler in ", self.path)

    def get_last_tree(self):
        if self.repo.head_is_unborn:
            tree_id = self.repo.TreeBuilder().write()
            return self.repo[tree_id]
        commit = self.repo[self.getCurrentCommit()]
        return commit.tree

    def insert_into_working_tree(self, blob_id, filename):
        self.tree_modifier.insert_blob(blob_id, filename)

    def remove_from_working_tree(self, filename):
        self.tree_modifier.remove_blob(filename)

    def write_file(self, filename, content):
        # TODO: combine writing many files
        assert isinstance(content, text_type)
        data = content.encode('utf-8')
        existing_entry = get_tree_entry(self.repo, self.working_tree, filename)
        if existing_entry:
            type = 'M'
            if existing_entry.id == git_hash(data):
                return
        else:
            type = 'A'
        blob_id = self.repo.create_blob(data)
        self.insert_into_working_tree(blob_id, filename)

        if not self.repo.is_bare and self.update_working_copy:
            real_filename = os.path.join(self.path, filename)
            mkdir_p(os.path.dirname(real_filename))
            with codecs.open(real_filename, 'w', encoding='utf-8') as outfile:
                outfile.write(content)

        self.messages.append('    {}  {}'.format(type, filename))

    def remove_file(self, filename):
        existing_entry = get_tree_entry(self.repo, self.working_tree, filename)
        if existing_entry:
            self.remove_from_working_tree(filename)

            if not self.repo.is_bare and self.update_working_copy:
                remove_file_with_empty_parents(self.path, filename)

            self.messages.append('    D  {}'.format(filename))

    def move_file(self, old_filename, new_filename):
        self.tree_modifier.move(old_filename, new_filename)

        if not self.repo.is_bare and self.update_working_copy:
            real_old_filename = os.path.join(self.path, old_filename)
            real_new_filename = os.path.join(self.path, new_filename)
            mkdir_p(os.path.dirname(real_new_filename))
            os.rename(real_old_filename, real_new_filename)
            remove_file_with_empty_parents(self.path, old_filename)

        self.messages.append('    R  {} -> {}'.format(old_filename,
                                                      new_filename))

    def commit(self):
        if self.tree_modifier.tree.oid != self.get_last_tree().oid:
            raise Exception("The repository was modified outside of this process. For safety reasons, we cannot commit!")
        self.working_tree = self.tree_modifier.apply()
        self.tree_modifier = TreeModifier(self.repo, self.working_tree)

        if self.repo.head_is_unborn:
            parents = []
        else:
            commit = self.repo[self.getCurrentCommit()]
            if commit.tree.id == self.working_tree.id:
                return
            parents = [commit.id]

        config = self.repo.config
        author = Signature(config['user.name'], config['user.email'])
        committer = Signature(config['user.name'], config['user.email'])
        tree_id = self.working_tree.id
        message = '\n'.join(self.messages)
        self.repo.create_commit('refs/heads/master',
                                author, committer, message,
                                tree_id,
                                parents)
        self.saveCurrentCommit()
        self.messages = []
        if not self.repo.is_bare and self.update_working_copy:
            self.repo.index.read_tree(self.working_tree)
            self.repo.index.write()

    def reset(self):
        self.working_tree = self.get_last_tree()
        self.tree_modifier = TreeModifier(self.repo, self.working_tree)
        self.messages = []

    def getCurrentCommit(self):
        return self.repo.head.target

    def saveCurrentCommit(self):
        with open(os.path.join(self.path, 'dbcommit'), 'w') as dbcommit_file:
            dbcommit_file.write(self.getCurrentCommit().hex+'\n')