def test_commit_serialization(self): assert_commit_serialization(self.gitrwrepo, self.gitrwrepo.head, True) rwrepo = self.gitrwrepo make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(rwrepo.head) nc = 5000 st = time() for i in xrange(nc): cm = Commit(rwrepo, Commit.NULL_BIN_SHA, hc.tree, hc.author, hc.authored_date, hc.author_tz_offset, hc.committer, hc.committed_date, hc.committer_tz_offset, str(i), parents=hc.parents, encoding=hc.encoding) stream = BytesIO() cm._serialize(stream) slen = stream.tell() stream.seek(0) cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha # END commit creation elapsed = time() - st print("Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed), file=sys.stderr)
def entry_at(cls, filepath, index): """:return: RefLogEntry at the given index :param filepath: full path to the index file from which to read the entry :param index: python list compatible index, i.e. it may be negative to specify an entry counted from the end of the list :raise IndexError: If the entry didn't exist .. note:: This method is faster as it only parses the entry at index, skipping all other lines. Nonetheless, the whole file has to be read if the index is negative """ fp = open(filepath, 'rb') if index < 0: return RefLogEntry.from_line(fp.readlines()[index].strip()) else: # read until index is reached for i in xrange(index + 1): line = fp.readline() if not line: break # END abort on eof # END handle runup if i != index or not line: raise IndexError # END handle exception return RefLogEntry.from_line(line.strip())
def tree_to_stream(entries, write): """Write the give list of entries into a stream using its write method :param entries: **sorted** list of tuples with (binsha, mode, name) :param write: write method which takes a data string""" ord_zero = ord('0') bit_mask = 7 # 3 bits set for binsha, mode, name in entries: mode_str = b'' for i in xrange(6): mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte # here it comes: if the name is actually unicode, the replacement below # will not work as the binsha is not part of the ascii unicode encoding - # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): name = name.encode(defenc) write(b''.join((mode_str, b' ', name, b'\0', binsha)))
def tree_to_stream(entries, write): """Write the give list of entries into a stream using its write method :param entries: **sorted** list of tuples with (binsha, mode, name) :param write: write method which takes a data string""" ord_zero = ord('0') bit_mask = 7 # 3 bits set for binsha, mode, name in entries: mode_str = b'' for i in xrange(6): mode_str = bchr(( (mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte # here it comes: if the name is actually unicode, the replacement below # will not work as the binsha is not part of the ascii unicode encoding - # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): name = name.encode(defenc) write(b''.join((mode_str, b' ', name, b'\0', binsha)))
def test_commit_serialization(self): assert_commit_serialization(self.gitrwrepo, '58c78e6', True) rwrepo = self.gitrwrepo make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(rwrepo.head) nc = 5000 st = time() for i in xrange(nc): cm = Commit(rwrepo, Commit.NULL_BIN_SHA, hc.tree, hc.author, hc.authored_date, hc.author_tz_offset, hc.committer, hc.committed_date, hc.committer_tz_offset, str(i), parents=hc.parents, encoding=hc.encoding) stream = BytesIO() cm._serialize(stream) slen = stream.tell() stream.seek(0) cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha # END commit creation elapsed = time() - st print( "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed), file=sys.stderr)
def rev_parse(repo, rev): """ :return: Object at the given revision, either Commit, Tag, Tree or Blob :param rev: git-rev-parse compatible revision specification as string, please see http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html for details :raise BadObject: if the given revision could not be found :raise ValueError: If rev couldn't be parsed :raise IndexError: If invalid reflog index is specified""" # colon search mode ? if rev.startswith(':/'): # colon search mode raise NotImplementedError("commit by message search ( regex )") # END handle search obj = None ref = None output_type = "commit" start = 0 parsed_to = 0 lr = len(rev) while start < lr: if rev[start] not in "^~:@": start += 1 continue # END handle start token = rev[start] if obj is None: # token is a rev name if start == 0: ref = repo.head.ref else: if token == '@': ref = name_to_object(repo, rev[:start], return_ref=True) else: obj = name_to_object(repo, rev[:start]) # END handle token # END handle refname if ref is not None: obj = ref.commit # END handle ref # END initialize obj on first token start += 1 # try to parse {type} if start < lr and rev[start] == '{': end = rev.find('}', start) if end == -1: raise ValueError("Missing closing brace to define type in %s" % rev) output_type = rev[start + 1:end] # exclude brace # handle type if output_type == 'commit': pass # default elif output_type == 'tree': try: obj = to_commit(obj).tree except (AttributeError, ValueError): pass # error raised later # END exception handling elif output_type in ('', 'blob'): if obj.type == 'tag': obj = deref_tag(obj) else: # cannot do anything for non-tags pass # END handle tag elif token == '@': # try single int assert ref is not None, "Requre Reference to access reflog" revlog_index = None try: # transform reversed index into the format of our revlog revlog_index = -(int(output_type) + 1) except ValueError: # TODO: Try to parse the other date options, using parse_date # maybe raise NotImplementedError( "Support for additional @{...} modes not implemented") # END handle revlog index try: entry = ref.log_entry(revlog_index) except IndexError: raise IndexError("Invalid revlog index: %i" % revlog_index) # END handle index out of bound obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) # make it pass the following checks output_type = None else: raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) # END handle output type # empty output types don't require any specific type, its just about dereferencing tags if output_type and obj.type != output_type: raise ValueError( "Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) # END verify output type start = end + 1 # skip brace parsed_to = start continue # END parse type # try to parse a number num = 0 if token != ":": found_digit = False while start < lr: if rev[start] in digits: num = num * 10 + int(rev[start]) start += 1 found_digit = True else: break # END handle number # END number parse loop # no explicit number given, 1 is the default # It could be 0 though if not found_digit: num = 1 # END set default num # END number parsing only if non-blob mode parsed_to = start # handle hierarchy walk try: if token == "~": obj = to_commit(obj) for _ in xrange(num): obj = obj.parents[0] # END for each history item to walk elif token == "^": obj = to_commit(obj) # must be n'th parent if num: obj = obj.parents[num - 1] elif token == ":": if obj.type != "tree": obj = obj.tree # END get tree type obj = obj[rev[start:]] parsed_to = lr else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError): raise BadName( "Invalid revision spec '%s' - not enough parent commits to reach '%s%i'" % (rev, token, num)) # END exception handling # END parse loop # still no obj ? Its probably a simple name if obj is None: obj = name_to_object(repo, rev) parsed_to = lr # END handle simple name if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) if parsed_to != lr: raise ValueError( "Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) return obj
def move(self, items, skip_errors=False, **kwargs): """Rename/move the items, whereas the last item is considered the destination of the move operation. If the destination is a file, the first item ( of two ) must be a file as well. If the destination is a directory, it may be preceeded by one or more directories or files. The working tree will be affected in non-bare repositories. :parma items: Multiple types of items are supported, please see the 'remove' method for reference. :param skip_errors: If True, errors such as ones resulting from missing source files will be skpped. :param kwargs: Additional arguments you would like to pass to git-mv, such as dry_run or force. :return:List(tuple(source_path_string, destination_path_string), ...) A list of pairs, containing the source file moved as well as its actual destination. Relative to the repository root. :raise ValueErorr: If only one item was given GitCommandError: If git could not handle your request""" args = list() if skip_errors: args.append('-k') paths = self._items_to_rela_paths(items) if len(paths) < 2: raise ValueError( "Please provide at least one source and one destination of the move operation" ) was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) kwargs['dry_run'] = True # first execute rename in dryrun so the command tells us what it actually does # ( for later output ) out = list() mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() # parse result - first 0:n/2 lines are 'checking ', the remaining ones # are the 'renaming' ones which we parse for ln in xrange(int(len(mvlines) / 2), len(mvlines)): tokens = mvlines[ln].split(' to ') assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] # [0] = Renaming x # [1] = y out.append((tokens[0][9:], tokens[1])) # END for each line to parse # either prepare for the real run, or output the dry-run result if was_dry_run: return out # END handle dryrun # now apply the actual operation kwargs.pop('dry_run') self.repo.git.mv(args, paths, **kwargs) return out
def move(self, items, skip_errors=False, **kwargs): """Rename/move the items, whereas the last item is considered the destination of the move operation. If the destination is a file, the first item ( of two ) must be a file as well. If the destination is a directory, it may be preceeded by one or more directories or files. The working tree will be affected in non-bare repositories. :parma items: Multiple types of items are supported, please see the 'remove' method for reference. :param skip_errors: If True, errors such as ones resulting from missing source files will be skpped. :param kwargs: Additional arguments you would like to pass to git-mv, such as dry_run or force. :return:List(tuple(source_path_string, destination_path_string), ...) A list of pairs, containing the source file moved as well as its actual destination. Relative to the repository root. :raise ValueErorr: If only one item was given GitCommandError: If git could not handle your request""" args = list() if skip_errors: args.append('-k') paths = self._items_to_rela_paths(items) if len(paths) < 2: raise ValueError("Please provide at least one source and one destination of the move operation") was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) kwargs['dry_run'] = True # first execute rename in dryrun so the command tells us what it actually does # ( for later output ) out = list() mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() # parse result - first 0:n/2 lines are 'checking ', the remaining ones # are the 'renaming' ones which we parse for ln in xrange(int(len(mvlines) / 2), len(mvlines)): tokens = mvlines[ln].split(' to ') assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] # [0] = Renaming x # [1] = y out.append((tokens[0][9:], tokens[1])) # END for each line to parse # either prepare for the real run, or output the dry-run result if was_dry_run: return out # END handle dryrun # now apply the actual operation kwargs.pop('dry_run') self.repo.git.mv(args, paths, **kwargs) return out
def rev_parse(repo, rev): """ :return: Object at the given revision, either Commit, Tag, Tree or Blob :param rev: git-rev-parse compatible revision specification as string, please see http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html for details :raise BadObject: if the given revision could not be found :raise ValueError: If rev couldn't be parsed :raise IndexError: If invalid reflog index is specified""" # colon search mode ? if rev.startswith(':/'): # colon search mode raise NotImplementedError("commit by message search ( regex )") # END handle search obj = None ref = None output_type = "commit" start = 0 parsed_to = 0 lr = len(rev) while start < lr: if rev[start] not in "^~:@": start += 1 continue # END handle start token = rev[start] if obj is None: # token is a rev name if start == 0: ref = repo.head.ref else: if token == '@': ref = name_to_object(repo, rev[:start], return_ref=True) else: obj = name_to_object(repo, rev[:start]) # END handle token # END handle refname if ref is not None: obj = ref.commit # END handle ref # END initialize obj on first token start += 1 # try to parse {type} if start < lr and rev[start] == '{': end = rev.find('}', start) if end == -1: raise ValueError("Missing closing brace to define type in %s" % rev) output_type = rev[start + 1:end] # exclude brace # handle type if output_type == 'commit': pass # default elif output_type == 'tree': try: obj = to_commit(obj).tree except (AttributeError, ValueError): pass # error raised later # END exception handling elif output_type in ('', 'blob'): if obj.type == 'tag': obj = deref_tag(obj) else: # cannot do anything for non-tags pass # END handle tag elif token == '@': # try single int assert ref is not None, "Requre Reference to access reflog" revlog_index = None try: # transform reversed index into the format of our revlog revlog_index = -(int(output_type) + 1) except ValueError: # TODO: Try to parse the other date options, using parse_date # maybe raise NotImplementedError("Support for additional @{...} modes not implemented") # END handle revlog index try: entry = ref.log_entry(revlog_index) except IndexError: raise IndexError("Invalid revlog index: %i" % revlog_index) # END handle index out of bound obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) # make it pass the following checks output_type = None else: raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) # END handle output type # empty output types don't require any specific type, its just about dereferencing tags if output_type and obj.type != output_type: raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) # END verify ouput type start = end + 1 # skip brace parsed_to = start continue # END parse type # try to parse a number num = 0 if token != ":": found_digit = False while start < lr: if rev[start] in digits: num = num * 10 + int(rev[start]) start += 1 found_digit = True else: break # END handle number # END number parse loop # no explicit number given, 1 is the default # It could be 0 though if not found_digit: num = 1 # END set default num # END number parsing only if non-blob mode parsed_to = start # handle hiererarchy walk try: if token == "~": obj = to_commit(obj) for item in xrange(num): obj = obj.parents[0] # END for each history item to walk elif token == "^": obj = to_commit(obj) # must be n'th parent if num: obj = obj.parents[num - 1] elif token == ":": if obj.type != "tree": obj = obj.tree # END get tree type obj = obj[rev[start:]] parsed_to = lr else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError): raise BadName("Invalid revision spec '%s' - not enough parent commits to reach '%s%i'" % (rev, token, num)) # END exception handling # END parse loop # still no obj ? Its probably a simple name if obj is None: obj = name_to_object(repo, rev) parsed_to = lr # END handle simple name if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) if parsed_to != lr: raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) return obj