def write_file(repo: pygit2.Repository, tree: pygit2.Tree, filepath: str, contents: str) -> pygit2.Oid: blob = repo.create_blob(contents.encode("utf-8")) paths = filepath.split("/") trees = [tree] for path in paths[:-1]: try: to_insert = repo[trees[0][path].oid] except KeyError: to_insert = None trees.insert(0, to_insert) to_insert = blob for path in reversed(paths): tree = trees.pop(0) assert isinstance(to_insert, pygit2.Oid) assert isinstance(repo[to_insert], pygit2.Blob) or isinstance( repo[to_insert], pygit2.Tree) if tree is None: tb = repo.TreeBuilder() else: tb = repo.TreeBuilder(tree) tb.insert( path, to_insert, pygit2.GIT_FILEMODE_BLOB if isinstance( repo[to_insert], pygit2.Blob) else pygit2.GIT_FILEMODE_TREE) to_insert = tb.write() assert len(trees) == 0 return to_insert
class GitBlack: def __init__(self): self.repo = Repository(".") self.patchers = {} def get_blamed_deltas(self, patch): filename = patch.delta.old_file.path self.patchers[filename] = Patcher(self.repo, filename) hb = HunkBlamer(self.repo, patch) return hb.blames() def group_blame_deltas(self, blames): for delta_blame in blames: commits = tuple(sorted(delta_blame.commits)) self.grouped_deltas.setdefault(commits, []).append(delta_blame.delta) self.progress += 1 now = time.monotonic() if now - self.last_log > 0.04: sys.stdout.write("Reading file {}/{} \r".format( self.progress, self.total)) sys.stdout.flush() self.last_log = now def commit_changes(self): start = time.monotonic() self.grouped_deltas = {} for path, status in self.repo.status().items(): if status & index_statuses: raise GitIndexNotEmpty patches = [] self._file_modes = {} diff = self.repo.diff(context_lines=0, flags=GIT_DIFF_IGNORE_SUBMODULES) for patch in diff: if patch.delta.status != GIT_DELTA_MODIFIED: continue self._file_modes[ patch.delta.old_file.path] = patch.delta.old_file.mode patches.append(patch) self.progress = 0 self.last_log = 0 self.total = len(patches) executor = ThreadPoolExecutor(max_workers=8) tasks = set() for patch in patches: tasks.add(executor.submit(self.get_blamed_deltas, patch)) if len(tasks) > 8: done, not_done = wait(tasks, return_when=FIRST_COMPLETED) for task in done: self.group_blame_deltas(task.result()) tasks -= set(done) for task in tasks: self.group_blame_deltas(task.result()) secs = time.monotonic() - start sys.stdout.write("Reading file {}/{} ({:.2f} secs).\n".format( self.progress, self.total, secs)) start = time.monotonic() self.total = len(self.grouped_deltas) self.progress = 0 self.last_log = 0 for commits, deltas in self.grouped_deltas.items(): blobs = self._create_blobs(deltas) self._commit(commits, blobs) secs = time.monotonic() - start print("Making commit {}/{} ({:.2f} secs).".format( self.progress, self.total, secs)) def _create_blobs(self, deltas): filenames = set() for delta in deltas: self.patchers[delta.filename].apply(delta) filenames.add(delta.filename) blobs = {} for filename in filenames: blob_id = self.repo.create_blob(self.patchers[filename].content()) blobs[filename] = blob_id return blobs def _commit(self, original_commits, blobs): for filename, blob_id in blobs.items(): file_mode = self._file_modes[filename] index_entry = IndexEntry(filename, blob_id, file_mode) self.repo.index.add(index_entry) commits = [self.repo.get(h) for h in original_commits] main_commit = commits[0] if len(commits) > 1: # most recent commit main_commit = sorted(commits, key=commit_datetime)[-1] commit_message = main_commit.message commit_message += "\n\nautomatic commit by git-black, original commits:\n" commit_message += "\n".join( [" {}".format(c) for c in original_commits]) committer = Signature( name=self.repo.config["user.name"], email=self.repo.config["user.email"], ) self.repo.index.write() tree = self.repo.index.write_tree() head = self.repo.head.peel() self.repo.create_commit("HEAD", main_commit.author, committer, commit_message, tree, [head.id]) self.progress += 1 now = time.monotonic() if now - self.last_log > 0.04: sys.stdout.write("Making commit {}/{} \r".format( self.progress, self.total)) sys.stdout.flush() self.last_log = now
class GitBareBackend(object): nb_transactions = 0 def __init__(self, path): self.path = abspath(path) + '/' # Open database self.path_data = '%s/database/' % self.path if not lfs.is_folder(self.path_data): error = '"%s" should be a folder, but it is not' % path raise ValueError, error # Open repository self.repo = Repository(self.path_data) # Read index try: tree = self.repo.head.peel(GIT_OBJ_TREE) self.repo.index.read_tree(tree.id) except: pass # Check git commiter try: _, _ = self.username, self.useremail except: print '=========================================' print 'ERROR: Please configure GIT commiter via' print ' $ git config --global user.name' print ' $ git config --global user.email' print '=========================================' raise @classmethod def init_backend(cls, path, init=False, soft=False): init_repository('{0}/database'.format(path), bare=True) ####################################################################### # Internal utility functions ####################################################################### def _call(self, command): """Interface to cal git.git for functions not yet implemented using libgit2. """ popen = Popen(command, stdout=PIPE, stderr=PIPE, cwd=self.path_data) stdoutdata, stderrdata = popen.communicate() if popen.returncode != 0: raise EnvironmentError, (popen.returncode, stderrdata) return stdoutdata @lazy def username(self): cmd = ['git', 'config', '--get', 'user.name'] try: username = self._call(cmd).rstrip() except EnvironmentError: raise ValueError( "Please configure 'git config --global user.name'") return username @lazy def useremail(self): cmd = ['git', 'config', '--get', 'user.email'] try: useremail = self._call(cmd).rstrip() except EnvironmentError: raise ValueError( "Please configure 'git config --global user.email'") return useremail def _resolve_reference(self, reference): """This method returns the SHA the given reference points to. For now only HEAD is supported. FIXME This is quick & dirty. TODO Implement references in pygit2 and use them here. """ # Case 1: SHA if len(reference) == 40: return reference # Case 2: reference reference = self.repo.lookup_reference(reference) try: reference = reference.resolve() except KeyError: return None return reference.target def normalize_key(self, path, __root=None): # Performance is critical so assume the path is already relative to # the repository. key = __root.resolve(path) if key and key[0] == '.git': err = "bad '{0}' path, access to the '.git' folder is denied" raise ValueError(err.format(path)) return '/'.join(key) def handler_exists(self, key): tree = self.repo.head.peel(GIT_OBJ_TREE) try: tree[key] except: return False return True def get_handler_names(self, key): try: tree = self.repo.head.peel(GIT_OBJ_TREE) if key: tree_entry = tree[key] if tree_entry.type == 'blob': raise ValueError tree = self.repo[tree_entry.id] except: yield None else: for item in tree: yield item.name def get_handler_data(self, key): tree = self.repo.head.peel(GIT_OBJ_TREE) tree_entry = tree[key] blob = self.repo[tree_entry.id] return blob.data def get_handler_mimetype(self, key): data = self.get_handler_data(key) return magic_from_buffer(data) def handler_is_file(self, key): return not self.handler_is_folder(key) def handler_is_folder(self, key): repository = self.repo if key == '': return True else: tree = repository.head.peel(GIT_OBJ_TREE) tree_entry = tree[key] return tree_entry.type == 'tree' def get_handler_mtime(self, key): # FIXME return datetime.utcnow().replace(tzinfo=fixed_offset(0)) def traverse_resources(self): tree = self.repo.head.peel(GIT_OBJ_TREE) yield self.get_resource('/') for name in self.get_names(tree): if name[-9:] == '.metadata' and name != '.metadata': yield self.get_resource('/' + name[:-9]) def get_names(self, tree, path=''): for entry in tree: base_path = '{0}/{1}'.format(path, entry.name) yield base_path if entry.filemode == GIT_FILEMODE_TREE: sub_tree = self.repo.get(entry.hex) for x in self.get_names(sub_tree, base_path): yield x def do_transaction(self, commit_message, data, added, changed, removed, handlers): self.nb_transactions += 1 # Get informations git_author, git_date, git_msg, docs_to_index, docs_to_unindex = data git_msg = commit_message or git_msg or 'no comment' # List of Changed added_and_changed = list(added) + list(changed) # Build the tree from index index = self.repo.index for key in added_and_changed: handler = handlers.get(key) blob_id = self.repo.create_blob(handler.to_str()) entry = IndexEntry(key, blob_id, GIT_FILEMODE_BLOB_EXECUTABLE) index.add(entry) for key in removed: index.remove(key) git_tree = index.write_tree() # Commit self.git_commit(git_msg, git_author, git_date, tree=git_tree) def git_commit(self, message, author=None, date=None, tree=None): """Equivalent to 'git commit', we must give the message and we can also give the author and date. """ # Tree if tree is None: #tree = self.index.write_tree() raise ValueError('Please give me a tree') # Parent parent = self._resolve_reference('HEAD') parents = [parent] if parent else [] # Committer when_time = time.time() when_offset = -(time.altzone if time.daylight else time.timezone) when_offset = when_offset / 60 name = self.username email = self.useremail committer = Signature(name, email, when_time, when_offset) # Author if author is None: author = (name, email) if date: if date.tzinfo: from pytz import utc when_time = date.astimezone(utc) # To UTC when_time = when_time.timetuple() # As struct_time when_time = timegm(when_time) # To unix time when_offset = date.utcoffset().seconds / 60 else: err = "Worktree.git_commit doesn't support naive datatime yet" raise NotImplementedError, err author = Signature(author[0], author[1], when_time, when_offset) # Create the commit return self.repo.create_commit('HEAD', author, committer, message, tree, parents) def abort_transaction(self): # TODO: Remove created blobs pass
class GitHandler(object): def __init__(self, path, repo_path=None, update_working_copy=True): """ Start a git handler in given repository. `update_working_copy`: wether also to update the working copy. By default, the git handler will only work on the git database. Updating the working copy can take a lot of time in large repositories. """ self.path = path if repo_path is None: repo_path = self.path self.repo_path = repo_path self.update_working_copy = update_working_copy self.repo = Repository(self.repo_path) self.working_tree = self.get_last_tree() self.tree_modifier = TreeModifier(self.repo, self.working_tree) self.messages = [] print("Started libgit2 git handler in ", self.path) def get_last_tree(self): if self.repo.head_is_unborn: tree_id = self.repo.TreeBuilder().write() return self.repo[tree_id] commit = self.repo[self.getCurrentCommit()] return commit.tree def insert_into_working_tree(self, blob_id, filename): self.tree_modifier.insert_blob(blob_id, filename) def remove_from_working_tree(self, filename): self.tree_modifier.remove_blob(filename) def write_file(self, filename, content): # TODO: combine writing many files assert isinstance(content, text_type) data = content.encode('utf-8') existing_entry = get_tree_entry(self.repo, self.working_tree, filename) if existing_entry: type = 'M' if existing_entry.id == git_hash(data): return else: type = 'A' blob_id = self.repo.create_blob(data) self.insert_into_working_tree(blob_id, filename) if not self.repo.is_bare and self.update_working_copy: real_filename = os.path.join(self.path, filename) mkdir_p(os.path.dirname(real_filename)) with codecs.open(real_filename, 'w', encoding='utf-8') as outfile: outfile.write(content) self.messages.append(' {} {}'.format(type, filename)) def remove_file(self, filename): existing_entry = get_tree_entry(self.repo, self.working_tree, filename) if existing_entry: self.remove_from_working_tree(filename) if not self.repo.is_bare and self.update_working_copy: remove_file_with_empty_parents(self.path, filename) self.messages.append(' D {}'.format(filename)) def move_file(self, old_filename, new_filename): self.tree_modifier.move(old_filename, new_filename) if not self.repo.is_bare and self.update_working_copy: real_old_filename = os.path.join(self.path, old_filename) real_new_filename = os.path.join(self.path, new_filename) mkdir_p(os.path.dirname(real_new_filename)) os.rename(real_old_filename, real_new_filename) remove_file_with_empty_parents(self.path, old_filename) self.messages.append(' R {} -> {}'.format(old_filename, new_filename)) def commit(self): if self.tree_modifier.tree.oid != self.get_last_tree().oid: raise Exception("The repository was modified outside of this process. For safety reasons, we cannot commit!") self.working_tree = self.tree_modifier.apply() self.tree_modifier = TreeModifier(self.repo, self.working_tree) if self.repo.head_is_unborn: parents = [] else: commit = self.repo[self.getCurrentCommit()] if commit.tree.id == self.working_tree.id: return parents = [commit.id] config = self.repo.config author = Signature(config['user.name'], config['user.email']) committer = Signature(config['user.name'], config['user.email']) tree_id = self.working_tree.id message = '\n'.join(self.messages) self.repo.create_commit('refs/heads/master', author, committer, message, tree_id, parents) self.saveCurrentCommit() self.messages = [] if not self.repo.is_bare and self.update_working_copy: self.repo.index.read_tree(self.working_tree) self.repo.index.write() def reset(self): self.working_tree = self.get_last_tree() self.tree_modifier = TreeModifier(self.repo, self.working_tree) self.messages = [] def getCurrentCommit(self): return self.repo.head.target def saveCurrentCommit(self): with open(os.path.join(self.path, 'dbcommit'), 'w') as dbcommit_file: dbcommit_file.write(self.getCurrentCommit().hex+'\n')