def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, rename_to, diff): self.a_mode = a_mode self.b_mode = b_mode if self.a_mode: self.a_mode = mode_str_to_int(self.a_mode) if self.b_mode: self.b_mode = mode_str_to_int(self.b_mode) if a_blob_id is None: self.a_blob = None else: self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) if b_blob_id is None: self.b_blob = None else: self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) self.new_file = new_file self.deleted_file = deleted_file # be clear and use None instead of empty strings self.rename_from = rename_from or None self.rename_to = rename_to or None self.diff = diff
def test_reading(self): gdb = GitDB(fixture_path('../../.git/objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) assert gdb.size() > 200 sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc") # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def test_reading(self): gdb = GitDB(fixture_path('../../../.git/objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) assert gdb.size() > 200 sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin( "155b62a9af0aa7677078331e111d0f7aa6eb4afc") # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex( bin_to_hex(binsha)[:8 - (i % 2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break # END abort reading parents self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read encoding or empty line to separate message enc = readline() enc = enc.strip() if enc: self.encoding = enc[enc.find(' ')+1:] # now comes the message separator readline() # END handle encoding # decode the authors name try: self.author.name = self.author.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) # END handle author's encoding # decode committer name try: self.committer.name = self.committer.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) # END handle author's encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) # END exception handling return self
def archive(test_viewer: TestViewer) -> Experiment: """ 将某次 test 对应 commit 的文件打包,相关命令为 git archive -o <filename> <commit-id> :param test_viewer: :return: """ repo = test_viewer.repo commit = Commit(repo, hex_to_bin(test_viewer.json_info['commit_hash'])) old_path = os.getcwd() os.chdir(commit.tree.abspath) exp = Experiment('Archive') revert_path = exp.makedir('archive') revert_fn = os.path.join(revert_path, "file.zip") exp.regist_plugin('archive', { 'file': revert_fn, 'test_name': test_viewer.test_name }) with open(revert_fn, 'wb') as w: commit.repo.archive(w, commit) exp.end() os.chdir(old_path) return exp
def _set_cache_(self, attr): """Cache all our attributes at once""" if attr in TagObject.__slots__: ostream = self.repo.odb.stream(self.binsha) lines = ostream.read().splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) self.tag = lines[2][4:] # tag <tag name> tagger_info = lines[3][7:]# tagger <actor> <date> self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) # line 4 empty - it could mark the beginning of the next header # in case there really is no message, it would not exist. Otherwise # a newline separates header from message if len(lines) > 5: self.message = "\n".join(lines[5:]) else: self.message = '' # END check our attributes else: super(TagObject, self)._set_cache_(attr)
def name_to_object(repo, name): """:return: object specified by the given name, hexshas ( short and long ) as well as references are supported""" hexsha = None # is it a hexsha ? Try the most common ones, which is 7 to 40 if repo.re_hexsha_shortened.match(name): if len(name) != 40: # find long sha for short sha hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas else: for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): try: hexsha = SymbolicReference.dereference_recursive(repo, base % name) break except ValueError: pass # END for each base # END handle hexsha # tried everything ? fail if hexsha is None: raise BadObject(name) # END assert hexsha was found return Object.new_from_sha(repo, hex_to_bin(hexsha))
def test_decompress_reader_special_case(self): odb = LooseObjectDB(fixture_path('objects')) ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b')) # if there is a bug, we will be missing one byte exactly ! data = ostream.read() assert len(data) == ostream.size
def tags(self): versions = [] repo = self._repo # Build a cache of tag -> commit # GitPython is not very optimized for reading large numbers of tags ref_cache = {} # 'ref/tags/<tag>' -> hexsha # This code is the same that is executed for each tag in gitpython, # we excute it only once for all tags. for hexsha, ref in git.TagReference._iter_packed_refs(repo): gitobject = git.Object.new_from_sha(repo, hex_to_bin(hexsha)) if gitobject.type == 'commit': ref_cache[ref] = str(gitobject) elif gitobject.type == 'tag' and gitobject.object.type == 'commit': ref_cache[ref] = str(gitobject.object) for tag in repo.tags: if tag.path in ref_cache: hexsha = ref_cache[tag.path] else: try: hexsha = str(tag.commit) except ValueError: # ValueError: Cannot resolve commit as tag TAGNAME points to a # blob object - use the `.object` property instead to access it # This is not a real tag for us, so we skip it # https://github.com/rtfd/readthedocs.org/issues/4440 log.warning('Git tag skipped: %s', tag, exc_info=True) continue versions.append(VCSVersion(self, hexsha, str(tag))) return versions
def _set_cache_(self, attr): """Cache all our attributes at once""" if attr in TagObject.__slots__: ostream = self.repo.odb.stream(self.binsha) lines = ostream.read().decode(defenc).splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> self.object = \ get_object_type_by_name(type_name.encode('ascii'))(self.repo, hex_to_bin(hexsha)) self.tag = lines[2][4:] # tag <tag name> tagger_info = lines[3] # tagger <actor> <date> self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date( tagger_info) # line 4 empty - it could mark the beginning of the next header # in case there really is no message, it would not exist. Otherwise # a newline separates header from message if len(lines) > 5: self.message = "\n".join(lines[5:]) else: self.message = '' # END check our attributes else: super(TagObject, self)._set_cache_(attr)
def test_writing(self, path): NULL_BIN_SHA = '\0' * 20 alt_path = os.path.join(path, 'alternates') rdb = ReferenceDB(alt_path) assert len(rdb.databases()) == 0 assert rdb.size() == 0 assert len(list(rdb.sha_iter())) == 0 # try empty, non-existing assert not rdb.has_object(NULL_BIN_SHA) # setup alternate file # add two, one is invalid own_repo_path = fixture_path('../../../.git/objects') # use own repo self.make_alt_file(alt_path, [own_repo_path, "invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 1 # we should now find a default revision of ours gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert rdb.has_object(gitdb_sha) # remove valid self.make_alt_file(alt_path, ["just/one/invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 0 # add valid self.make_alt_file(alt_path, [own_repo_path]) rdb.update_cache() assert len(rdb.databases()) == 1
def name_to_object(repo, name): """:return: object specified by the given name, hexshas ( short and long ) as well as references are supported""" hexsha = None # is it a hexsha ? Try the most common ones, which is 7 to 40 if repo.re_hexsha_shortened.match(name): if len(name) != 40: # find long sha for short sha hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas else: for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): try: hexsha = SymbolicReference.dereference_recursive( repo, base % name) break except ValueError: pass # END for each base # END handle hexsha # tried everything ? fail if hexsha is None: raise BadObject(name) # END assert hexsha was found return Object.new_from_sha(repo, hex_to_bin(hexsha))
def test_index_merge_tree(self, rw_repo): # A bit out of place, but we need a different repo for this: self.assertNotEqual(self.rorepo, rw_repo) self.assertEqual(len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))), 2) # SINGLE TREE MERGE # current index is at the (virtual) cur_commit next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" parent_commit = rw_repo.head.commit.parents[0] manifest_key = IndexFile.entry_key('MANIFEST.in', 0) manifest_entry = rw_repo.index.entries[manifest_key] rw_repo.index.merge_tree(next_commit) # only one change should be recorded assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha rw_repo.index.reset(rw_repo.head) self.assertEqual(rw_repo.index.entries[manifest_key].binsha, manifest_entry.binsha) # FAKE MERGE ############# # Add a change with a NULL sha that should conflict with next_commit. We # pretend there was a change, but we do not even bother adding a proper # sha for it ( which makes things faster of course ) manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3])) # try write flag self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False)) # add actually resolves the null-hex-sha for us as a feature, but we can # edit the index manually assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA # must operate on the same index for this ! Its a bit problematic as # it might confuse people index = rw_repo.index index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry) index.write() self.assertEqual(rw_repo.index.entries[manifest_key].hexsha, Diff.NULL_HEX_SHA) # write an unchanged index ( just for the fun of it ) rw_repo.index.write() # a three way merge would result in a conflict and fails as the command will # not overwrite any entries in our index and hence leave them unmerged. This is # mainly a protection feature as the current index is not yet in a tree self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit) # the only way to get the merged entries is to safe the current index away into a tree, # which is like a temporary commit for us. This fails as well as the NULL sha deos not # have a corresponding object # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true # self.failUnlessRaises(GitCommandError, index.write_tree) # if missing objects are okay, this would work though ( they are always okay now ) # As we can't read back the tree with NULL_SHA, we rather set it to something else index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:]) tree = index.write_tree() # now make a proper three way merge with unmerged entries unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit) unmerged_blobs = unmerged_tree.unmerged_blobs() self.assertEqual(len(unmerged_blobs), 1) self.assertEqual(list(unmerged_blobs.keys())[0], manifest_key[0])
def _iter_from_process_or_stream(cls, repo, proc_or_stream): """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: git-rev-list process instance - one sha per line :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream, 'readline'): stream = proc_or_stream.stdout readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, rest = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield Commit(repo, hex_to_bin(hexsha)) # END for each line in stream # TODO: Review this - it seems process handling got a bit out of control # due to many developers trying to fix the open file handles issue if hasattr(proc_or_stream, 'wait'): finalize_process(proc_or_stream)
def test_writing(self, path): alt_path = os.path.join(path, 'alternates') rdb = ReferenceDB(alt_path) assert len(rdb.databases()) == 0 assert rdb.size() == 0 assert len(list(rdb.sha_iter())) == 0 # try empty, non-existing assert not rdb.has_object(NULL_BIN_SHA) # setup alternate file # add two, one is invalid own_repo_path = fixture_path('../../../.git/objects') # use own repo self.make_alt_file(alt_path, [own_repo_path, "invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 1 # we should now find a default revision of ours gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert rdb.has_object(gitdb_sha) # remove valid self.make_alt_file(alt_path, ["just/one/invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 0 # add valid self.make_alt_file(alt_path, [own_repo_path]) rdb.update_cache() assert len(rdb.databases()) == 1
def test_list(self): assert isinstance( Commit.list_items(self.rorepo, "0.1.5", max_count=5)[ hex_to_bin("5117c9c8a4d3af19a9958677e45cda9269de1541") ], Commit, )
def test_index_merge_tree(self, rw_repo): # A bit out of place, but we need a different repo for this: assert self.rorepo != rw_repo and not (self.rorepo == rw_repo) assert len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))) == 2 # SINGLE TREE MERGE # current index is at the (virtual) cur_commit next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" parent_commit = rw_repo.head.commit.parents[0] manifest_key = IndexFile.entry_key('MANIFEST.in', 0) manifest_entry = rw_repo.index.entries[manifest_key] rw_repo.index.merge_tree(next_commit) # only one change should be recorded assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha rw_repo.index.reset(rw_repo.head) assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha # FAKE MERGE ############# # Add a change with a NULL sha that should conflict with next_commit. We # pretend there was a change, but we do not even bother adding a proper # sha for it ( which makes things faster of course ) manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3])) # try write flag self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False)) # add actually resolves the null-hex-sha for us as a feature, but we can # edit the index manually assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA # must operate on the same index for this ! Its a bit problematic as # it might confuse people index = rw_repo.index index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry) index.write() assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA # write an unchanged index ( just for the fun of it ) rw_repo.index.write() # a three way merge would result in a conflict and fails as the command will # not overwrite any entries in our index and hence leave them unmerged. This is # mainly a protection feature as the current index is not yet in a tree self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit) # the only way to get the merged entries is to safe the current index away into a tree, # which is like a temporary commit for us. This fails as well as the NULL sha deos not # have a corresponding object # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true # self.failUnlessRaises(GitCommandError, index.write_tree) # if missing objects are okay, this would work though ( they are always okay now ) # As we can't read back the tree with NULL_SHA, we rather set it to something else index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:]) tree = index.write_tree() # now make a proper three way merge with unmerged entries unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit) unmerged_blobs = unmerged_tree.unmerged_blobs() assert len(unmerged_blobs) == 1 and list(unmerged_blobs.keys())[0] == manifest_key[0]
def _get_object(self): """ :return: The object our ref currently refers to. Refs can be cached, they will always point to the actual object as it gets re-created on each query""" # have to be dynamic here as we may be a tag which can point to anything # Our path will be resolved to the hexsha which will be used accordingly return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, raw_rename_from, raw_rename_to, diff): self.a_mode = a_mode self.b_mode = b_mode assert a_rawpath is None or isinstance(a_rawpath, binary_type) assert b_rawpath is None or isinstance(b_rawpath, binary_type) self.a_rawpath = a_rawpath self.b_rawpath = b_rawpath if self.a_mode: self.a_mode = mode_str_to_int(self.a_mode) if self.b_mode: self.b_mode = mode_str_to_int(self.b_mode) if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA: self.a_blob = None else: self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path) if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA: self.b_blob = None else: self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path) self.new_file = new_file self.deleted_file = deleted_file # be clear and use None instead of empty strings assert raw_rename_from is None or isinstance(raw_rename_from, binary_type) assert raw_rename_to is None or isinstance(raw_rename_to, binary_type) self.raw_rename_from = raw_rename_from or None self.raw_rename_to = raw_rename_to or None self.diff = diff
def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of (commit, range) tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of (git.Commit, range) tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = iter(data.splitlines()) while True: line = next(stream) # when exhausted, casues a StopIteration, terminating this function hexsha, _, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental break c = Commit(self, hex_to_bin(hexsha), author=Actor(safe_decode(props[b'author']), safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor(safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time']), message=safe_decode(props[b'summary'])) commits[hexsha] = c else: # Discard the next line (it's a filename end tag) line = next(stream) assert line.startswith(b'filename'), 'Unexpected git blame output' yield commits[hexsha], range(lineno, lineno + num_lines)
def sha_iter(self): # find all files which look like an object, extract sha from there for root, dirs, files in os.walk(self.root_path()): root_base = basename(root) if len(root_base) != 2: continue for f in files: if len(f) != 38: continue yield hex_to_bin(root_base + f)
def partial_to_complete_sha_hex(self, partial_hexsha): """:return: Full binary 20 byte sha from the given partial hexsha :raise AmbiguousObjectName: :raise BadObject: :note: currently we only raise BadObject as git does not communicate AmbiguousObjects separately""" try: hexsha, typename, size = self._git.get_object_header(partial_hexsha) return hex_to_bin(hexsha) except (GitCommandError, ValueError): raise BadObject(partial_hexsha)
def count_rev_list(repo): print('Counting with rev-list - this will NOT count dangling objects.') typecount = defaultdict(int) for line in repo.git.rev_list('--objects', '--all').split('\n'): binsha = hex_to_bin(line.split()[0]) oinfo = repo.odb.info(binsha) typecount[oinfo.type] += 1 print( ', '.join('{:s}s: {:d}'.format(k.decode('utf8').capitalize(), v) for k, v in sorted(typecount.items())), 'Total:', sum(typecount.values()))
def partial_to_complete_sha_hex(self, partial_hexsha): """:return: Full binary 20 byte sha from the given partial hexsha :raise AmbiguousObjectName: :raise BadObject: :note: currently we only raise BadObject as pygit does not communicate AmbiguousObjects separately""" try: hexsha, typename, size = self._git.get_object_header(partial_hexsha) return hex_to_bin(hexsha) except (GitPyCommandError, ValueError): raise BadObject(partial_hexsha)
def fu(repo, text): for header in Diff.re_header.finditer(text): a_path, b_path, similarity_index, rename_from, rename_to, \ old_mode, new_mode, new_file_mode, deleted_file_mode, \ a_blob_id, b_blob_id, b_mode = header.groups() # new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) # Our only means to find the actual text is to see what has not been matched by our regex, # and then retro-actively assin it to our index # if previous_header is not None: # index[-1].diff = text[previous_header.end():header.start()] # end assign actual diff # Make sure the mode is set if the path is set. Otherwise the resulting blob is invalid # We just use the one mode we should have parsed a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode)) b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode) ablob = Blob(repo, hex_to_bin(a_blob_id), mode=a_mode, path=a_path) bblob = Blob(repo, hex_to_bin(b_blob_id), mode=b_mode, path=a_path) return ablob, bblob
def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break # END abort reading parents self.parents.append( type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date( next_line) self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date( readline()) # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read encoding or empty line to separate message enc = readline() enc = enc.strip() if enc: self.encoding = enc[enc.find(' ') + 1:] # now comes the message separator readline() # END handle encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() return self
def test_base_object(self): # test interface of base object classes types = (Blob, Tree, Commit, TagObject) assert len(types) == len(self.type_tuples) s = set() num_objs = 0 num_index_objs = 0 for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples): binsha = hex_to_bin(hexsha) item = None if path is None: item = obj_type(self.rorepo, binsha) else: item = obj_type(self.rorepo, binsha, 0, path) # END handle index objects num_objs += 1 assert item.hexsha == hexsha assert item.type == typename assert item.size assert item == item assert not item != item assert str(item) == item.hexsha assert repr(item) s.add(item) if isinstance(item, base.IndexObject): num_index_objs += 1 if hasattr(item, 'path'): # never runs here assert not item.path.startswith("/") # must be relative assert isinstance(item.mode, int) # END index object check # read from stream data_stream = item.data_stream data = data_stream.read() assert data tmpfilename = tempfile.mktemp(suffix='test-stream') tmpfile = open(tmpfilename, 'wb+') assert item == item.stream_data(tmpfile) tmpfile.seek(0) assert tmpfile.read() == data tmpfile.close() os.remove(tmpfilename) # END stream to file directly # END for each object type to create # each has a unique sha assert len(s) == num_objs assert len(s | s) == num_objs assert num_index_objs == 2
def _get_commit(self): """ :return: Commit object we point to, works for detached and non-detached SymbolicReferences""" # we partially reimplement it to prevent unnecessary file access hexsha, target_ref_path = self._get_ref_info() # it is a detached reference if hexsha: return Commit(self.repo, hex_to_bin(hexsha)) return self.from_path(self.repo, target_ref_path).commit
def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, raw_rename_from, raw_rename_to, diff, change_type): self.a_mode = a_mode self.b_mode = b_mode assert a_rawpath is None or isinstance(a_rawpath, binary_type) assert b_rawpath is None or isinstance(b_rawpath, binary_type) self.a_rawpath = a_rawpath self.b_rawpath = b_rawpath if self.a_mode: self.a_mode = mode_str_to_int(self.a_mode) if self.b_mode: self.b_mode = mode_str_to_int(self.b_mode) if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA: self.a_blob = None else: self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path) if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA: self.b_blob = None else: self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path) self.new_file = new_file self.deleted_file = deleted_file # be clear and use None instead of empty strings assert raw_rename_from is None or isinstance(raw_rename_from, binary_type) assert raw_rename_to is None or isinstance(raw_rename_to, binary_type) self.raw_rename_from = raw_rename_from or None self.raw_rename_to = raw_rename_to or None self.diff = diff self.change_type = change_type
def partial_to_complete_sha_hex(self, partial_hexsha): """ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha :param partial_hexsha: hexsha with less than 40 byte :raise AmbiguousObjectName: """ databases = list() _databases_recursive(self, databases) len_partial_hexsha = len(partial_hexsha) if len_partial_hexsha % 2 != 0: partial_binsha = hex_to_bin(partial_hexsha + "0") else: partial_binsha = hex_to_bin(partial_hexsha) # END assure successful binary conversion candidate = None for db in databases: full_bin_sha = None try: if hasattr(db, 'partial_to_complete_sha_hex'): full_bin_sha = db.partial_to_complete_sha_hex( partial_hexsha) else: full_bin_sha = db.partial_to_complete_sha( partial_binsha, len_partial_hexsha) # END handle database type except BadObject: continue # END ignore bad objects if full_bin_sha: if candidate and candidate != full_bin_sha: raise AmbiguousObjectName(partial_hexsha) candidate = full_bin_sha # END handle candidate # END for each db if not candidate: raise BadObject(partial_binsha) return candidate
def test_decompress_reader_special_case(self): odb = LooseObjectDB(fixture_path('objects')) mdb = MemoryDB() for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911', b'7bb839852ed5e3a069966281bb08d50012fb309b',): ostream = odb.stream(hex_to_bin(sha)) # if there is a bug, we will be missing one byte exactly ! data = ostream.read() assert len(data) == ostream.size # Putting it back in should yield nothing new - after all, we have dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data))) assert dump.hexsha == sha
def partial_to_complete_sha_hex(self, partial_hexsha): """ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha :param partial_hexsha: hexsha with less than 40 byte :raise AmbiguousObjectName: """ databases = list() _databases_recursive(self, databases) len_partial_hexsha = len(partial_hexsha) if len_partial_hexsha % 2 != 0: partial_binsha = hex_to_bin(partial_hexsha + "0") else: partial_binsha = hex_to_bin(partial_hexsha) # END assure successful binary conversion candidate = None for db in databases: full_bin_sha = None try: if hasattr(db, 'partial_to_complete_sha_hex'): full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) else: full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) # END handle database type except BadObject: continue # END ignore bad objects if full_bin_sha: if candidate and candidate != full_bin_sha: raise AmbiguousObjectName(partial_hexsha) candidate = full_bin_sha # END handle candidate # END for each db if not candidate: raise BadObject(partial_binsha) return candidate
def name_to_object(repo, name, return_ref=False): """ :return: object specified by the given name, hexshas ( short and long ) as well as references are supported :param return_ref: if name specifies a reference, we will return the reference instead of the object. Otherwise it will raise BadObject """ hexsha = None # is it a hexsha ? Try the most common ones, which is 7 to 40 if repo.re_hexsha_shortened.match(name): if len(name) != 40: # find long sha for short sha hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas #END find sha if it matches # if we couldn't find an object for what seemed to be a short hexsha # try to find it as reference anyway, it could be named 'aaa' for instance if hexsha is None: for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): try: hexsha = SymbolicReference.dereference_recursive( repo, base % name) if return_ref: return SymbolicReference(repo, base % name) #END handle symbolic ref break except ValueError: pass # END for each base # END handle hexsha # didn't find any ref, this is an error if return_ref: raise BadObject("Couldn't find reference named %r" % name) #END handle return ref # tried everything ? fail if hexsha is None: raise BadObject(name) # END assert hexsha was found return Object.new_from_sha(repo, hex_to_bin(hexsha))
def name_to_object(repo, name, return_ref=False): """ :return: object specified by the given name, hexshas ( short and long ) as well as references are supported :param return_ref: if name specifies a reference, we will return the reference instead of the object. Otherwise it will raise BadObject """ hexsha = None # is it a hexsha ? Try the most common ones, which is 7 to 40 if repo.re_hexsha_shortened.match(name): if len(name) != 40: # find long sha for short sha hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas # END find sha if it matches # if we couldn't find an object for what seemed to be a short hexsha # try to find it as reference anyway, it could be named 'aaa' for instance if hexsha is None: for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): try: hexsha = SymbolicReference.dereference_recursive(repo, base % name) if return_ref: return SymbolicReference(repo, base % name) #END handle symbolic ref break except ValueError: pass # END for each base # END handle hexsha # didn't find any ref, this is an error if return_ref: raise BadObject("Couldn't find reference named %r" % name) #END handle return ref # tried everything ? fail if hexsha is None: raise BadObject(name) # END assert hexsha was found return Object.new_from_sha(repo, hex_to_bin(hexsha))
def reset(test_viewer: TestViewer) -> Experiment: """ 将工作目录中的文件恢复到某个commit 恢复快照的 git 流程: git branch experiment git add . & git commit -m ... // 保证文件最新,防止冲突报错,这一步由 Experiment() 代为完成 git checkout <commit-id> // 恢复文件到 <commit-id> git checkout -b reset // 将当前状态附到新的临时分支 reset 上 git branch experiment // 切换回 experiment 分支 git add . & git commit -m ... // 将当前状态重新提交到最新 // 此时experiment 中最新的commit 为恢复的<commit-id> git branch -D reset // 删除临时分支 git branch master // 最终回到原来分支,保证除文件变动外git状态完好 :param test_viewer: TestViewer :return: """ repo = test_viewer.repo commit = Commit(repo, hex_to_bin(test_viewer.json_info['commit_hash'])) old_path = os.getcwd() os.chdir(commit.tree.abspath) exp = Experiment('Reset') repo = commit.repo # type:Repo with branch(commit.repo, _GITKEY.thexp_branch) as new_branch: repo.git.checkout(commit.hexsha) repo.git.checkout('-b', 'reset') repo.head.reference = new_branch repo.git.add('.') ncommit = repo.index.commit("Reset from {}".format(commit.hexsha)) repo.git.branch('-d', 'reset') exp.regist_plugin( 'reset', { 'test_name': test_viewer.test_name, # 从哪个状态恢复 'from': exp.commit.hexsha, # reset 运行时的快照 'where': commit.hexsha, # 恢复到哪一次 commit,是恢复前的保存的状态 'to': ncommit.hexsha, # 对恢复后的状态再次进行提交,此时 from 和 to 两次提交状态应该完全相同 }) exp.end() os.chdir(old_path) return exp
def getCategoryObjects(workingDir): repo = Repo(workingDir) co = CategoryObjects() allShaWithName = repo.git.execute( 'git rev-list --objects --all --indexed-objects', shell=True) allSha = [ item.split(' ')[0] for item in allShaWithName.split('\n') if item ] for shaStr in allSha: shaBin = hex_to_bin(shaStr) info = repo.odb.info(shaBin) type = info.type.decode('utf-8') if type == 'tree': co.trees.append(createTree(repo, shaBin, shaStr)) elif type == 'commit': co.commits.append(createCommit(repo, shaBin, shaStr)) elif type == 'blob': co.blobs.append(createBlob(repo, shaBin, shaStr)) else: pass return co
def __setstate__(self, state: dict): from gitdb.util import hex_to_bin from git import Commit self._start_time = state['_start_time'] self._time_fmt = state['_time_fmt'] self._exp_name = state['_exp_name'] self._exp_dir = state['_exp_dir'] self._test_dir = state['_test_dir'] self._project_dir = state['_project_dir'] self._hold_dirs = state['_hold_dirs'] self._plugins = state['_plugins'] self._tags = state['_tags'] self._exc_dict = state.get('_exc_dict', None) self._end_state = state['_end_state'] self._in_main = state['_in_main'] self._repo = None self._commit = Commit(self.repo, hex_to_bin(state['_commit'])) self._config = globs
def _iter_from_process_or_stream(cls, repo, proc_or_stream): """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: git-rev-list process instance - one sha per line :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream, 'readline'): stream = proc_or_stream.stdout readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, rest = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield Commit(repo, hex_to_bin(hexsha))
def _iter_from_process_or_stream(cls, repo, proc_or_stream): """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: pygit-rev-list process instance - one sha per line :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream,'readline'): stream = proc_or_stream.stdout readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, rest = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield Commit(repo, hex_to_bin(hexsha))
def rev_parse(repo, rev): """ :return: Object at the given revision, either Commit, Tag, Tree or Blob :param rev: git-rev-parse compatible revision specification, please see http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html for details :note: Currently there is no access to the rev-log, rev-specs may only contain topological tokens such ~ and ^. :raise BadObject: if the given revision could not be found :raise ValueError: If rev couldn't be parsed :raise IndexError: If invalid reflog index is specified""" # colon search mode ? if rev.startswith(':/'): # colon search mode raise NotImplementedError("commit by message search ( regex )") # END handle search obj = None ref = None output_type = "commit" start = 0 parsed_to = 0 lr = len(rev) while start < lr: if rev[start] not in "^~:@": start += 1 continue # END handle start token = rev[start] if obj is None: # token is a rev name if start == 0: ref = repo.head.ref else: if token == '@': ref = name_to_object(repo, rev[:start], return_ref=True) else: obj = name_to_object(repo, rev[:start]) # END handle token # END handle refname if ref is not None: obj = ref.commit #END handle ref # END initialize obj on first token start += 1 # try to parse {type} if start < lr and rev[start] == '{': end = rev.find('}', start) if end == -1: raise ValueError("Missing closing brace to define type in %s" % rev) output_type = rev[start + 1:end] # exclude brace # handle type if output_type == 'commit': pass # default elif output_type == 'tree': try: obj = to_commit(obj).tree except (AttributeError, ValueError): pass # error raised later # END exception handling elif output_type in ('', 'blob'): if obj.type == 'tag': obj = deref_tag(obj) else: # cannot do anything for non-tags pass # END handle tag elif token == '@': # try single int assert ref is not None, "Requre Reference to access reflog" revlog_index = None try: # transform reversed index into the format of our revlog revlog_index = -(int(output_type) + 1) except ValueError: # TODO: Try to parse the other date options, using parse_date # maybe raise NotImplementedError("Support for additional @{...} modes not implemented") # END handle revlog index try: entry = ref.log_entry(revlog_index) except IndexError: raise IndexError("Invalid revlog index: %i" % revlog_index) #END handle index out of bound obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) # make it pass the following checks output_type = None else: raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) # END handle output type # empty output types don't require any specific type, its just about dereferencing tags if output_type and obj.type != output_type: raise ValueError("Could not accomodate requested object type %r, got %s" % ( output_type, obj.type)) # END verify ouput type start = end + 1 # skip brace parsed_to = start continue # END parse type # try to parse a number num = 0 if token != ":": found_digit = False while start < lr: if rev[start] in digits: num = num * 10 + int(rev[start]) start += 1 found_digit = True else: break # END handle number # END number parse loop # no explicit number given, 1 is the default # It could be 0 though if not found_digit: num = 1 # END set default num # END number parsing only if non-blob mode parsed_to = start # handle hiererarchy walk try: if token == "~": obj = to_commit(obj) for item in xrange(num): obj = obj.parents[0] # END for each history item to walk elif token == "^": obj = to_commit(obj) # must be n'th parent if num: obj = obj.parents[num - 1] elif token == ":": if obj.type != "tree": obj = obj.tree # END get tree type obj = obj[rev[start:]] parsed_to = lr else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError): raise BadObject("Invalid Revision in %s" % rev) # END exception handling # END parse loop # still no obj ? Its probably a simple name if obj is None: obj = name_to_object(repo, rev) parsed_to = lr # END handle simple name if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) if parsed_to != lr: raise ValueError( "Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) return obj
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string( info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def stream(self, sha): """For now, all lookup is done by pygit itself""" hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) return OStream(hex_to_bin(hexsha), typename, size, stream)
def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith(b'parent'): next_line = parent_line break # END abort reading parents self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii')))) # END for each parent line self.parents = tuple(self.parents) # we don't know actual author encoding before we have parsed it, so keep the lines around author_line = next_line committer_line = readline() # we might run into one or more mergetag blocks, skip those for now next_line = readline() while next_line.startswith(b'mergetag '): next_line = readline() while next_line.startswith(b' '): next_line = readline() # end skip mergetags # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read headers enc = next_line buf = enc.strip() while buf: if buf[0:10] == b"encoding ": self.encoding = buf[buf.find(' ') + 1:].decode('ascii') elif buf[0:7] == b"gpgsig ": sig = buf[buf.find(b' ') + 1:] + b"\n" is_next_header = False while True: sigbuf = readline() if not sigbuf: break if sigbuf[0:1] != b" ": buf = sigbuf.strip() is_next_header = True break sig += sigbuf[1:] # end read all signature self.gpgsig = sig.rstrip(b"\n").decode('ascii') if is_next_header: continue buf = readline().strip() # decode the authors name try: self.author, self.authored_date, self.author_tz_offset = \ parse_actor_and_date(author_line.decode(self.encoding)) except UnicodeDecodeError: log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) try: self.committer, self.committed_date, self.committer_tz_offset = \ parse_actor_and_date(committer_line.decode(self.encoding)) except UnicodeDecodeError: log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) # END handle author's encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling return self
def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break # END abort reading parents self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) # we might run into one or more mergetag blocks, skip those for now next_line = readline() while next_line.startswith('mergetag '): next_line = readline() while next_line.startswith(' '): next_line = readline() # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read headers enc = next_line buf = enc.strip() while buf != "": if buf[0:10] == "encoding ": self.encoding = buf[buf.find(' ') + 1:] elif buf[0:7] == "gpgsig ": sig = buf[buf.find(' ') + 1:] + "\n" is_next_header = False while True: sigbuf = readline() if sigbuf == "": break if sigbuf[0:1] != " ": buf = sigbuf.strip() is_next_header = True break sig += sigbuf[1:] self.gpgsig = sig.rstrip("\n") if is_next_header: continue buf = readline().strip() # decode the authors name try: self.author.name = self.author.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) # END handle author's encoding # decode committer name try: self.committer.name = self.committer.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) # END handle author's encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) # END exception handling return self
def test_base(self): rlp_head = fixture_path('reflog_HEAD') rlp_master = fixture_path('reflog_master') tdir = tempfile.mktemp(suffix="test_reflogs") os.mkdir(tdir) rlp_master_ro = RefLog.path(self.rorepo.head) assert os.path.isfile(rlp_master_ro) # simple read reflog = RefLog.from_file(rlp_master_ro) assert reflog._path is not None assert isinstance(reflog, RefLog) assert len(reflog) # iter_entries works with path and with stream assert len(list(RefLog.iter_entries(open(rlp_master, 'rb')))) assert len(list(RefLog.iter_entries(rlp_master))) # raise on invalid revlog # TODO: Try multiple corrupted ones ! pp = 'reflog_invalid_' for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'): self.failUnlessRaises(ValueError, RefLog.from_file, fixture_path(pp + suffix)) # END for each invalid file # cannot write an uninitialized reflog self.failUnlessRaises(ValueError, RefLog().write) # test serialize and deserialize - results must match exactly binsha = hex_to_bin(('f' * 40).encode('ascii')) msg = "my reflog message" cr = self.rorepo.config_reader() for rlp in (rlp_head, rlp_master): reflog = RefLog.from_file(rlp) tfile = os.path.join(tdir, os.path.basename(rlp)) reflog.to_file(tfile) assert reflog.write() is reflog # parsed result must match ... treflog = RefLog.from_file(tfile) assert treflog == reflog # ... as well as each bytes of the written stream assert open(tfile).read() == open(rlp).read() # append an entry entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg) assert entry.oldhexsha == IndexObject.NULL_HEX_SHA assert entry.newhexsha == 'f' * 40 assert entry.message == msg assert RefLog.from_file(tfile)[-1] == entry # index entry # raises on invalid index self.failUnlessRaises(IndexError, RefLog.entry_at, rlp, 10000) # indices can be positive ... assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry) RefLog.entry_at(rlp, 23) # ... and negative for idx in (-1, -24): RefLog.entry_at(rlp, idx) # END for each index to read # END for each reflog # finally remove our temporary data shutil.rmtree(tdir)
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit(self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of BlameEntry tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of BlameEntry tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = (line for line in data.split(b'\n') if line) while True: line = next( stream ) # when exhausted, casues a StopIteration, terminating this function hexsha, orig_lineno, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) orig_lineno = int(orig_lineno) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental orig_filename = value break c = Commit( self, hex_to_bin(hexsha), author=Actor( safe_decode(props[b'author']), safe_decode( props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor( safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip( b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time']), message=safe_decode(props[b'summary'])) commits[hexsha] = c else: # Discard the next line (it's a filename end tag) line = next(stream) tag, value = line.split(b' ', 1) assert tag == b'filename', 'Unexpected git blame output' orig_filename = value yield BlameEntry(commits[hexsha], range(lineno, lineno + num_lines), safe_decode(orig_filename), range(orig_lineno, orig_lineno + num_lines))
def info(self, sha): hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) return OInfo(hex_to_bin(hexsha), typename, size)
def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None writer = self.ostream() if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix="obj", dir=self._root_path) if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object( istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size ) # END handle direct stream copies finally: if tmp_path: writer.close() # END assure target stream is closed except: if tmp_path: os.remove(tmp_path) raise # END assure tmpfile removal on error hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): mkdir(obj_dir) # END handle destination directory # rename onto existing doesn't work on windows if os.name == "nt": if isfile(obj_path): remove(tmp_path) else: rename(tmp_path, obj_path) # end rename only if needed else: rename(tmp_path, obj_path) # END handle win32 # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run istream.binsha = hex_to_bin(hexsha) return istream