示例#1
0
文件: fun.py 项目: Kronuz/gitdb
def pack_object_header_info(data):
    """
    :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
        The type_id should be interpreted according to the ``type_id_to_type_map`` map
        The byte-offset specifies the start of the actual zlib compressed datastream
    :param m: random-access memory, like a string or memory map"""
    c = byte_ord(data[0])           # first byte
    i = 1                           # next char to read
    type_id = (c >> 4) & 7          # numeric type
    size = c & 15                   # starting size
    s = 4                           # starting bit-shift size
    if PY3:
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            size += (c & 0x7f) << s
            s += 7
        # END character loop
    else:
        while c & 0x80:
            c = ord(data[i])
            i += 1
            size += (c & 0x7f) << s
            s += 7
        # END character loop
    # end performance at expense of maintenance ...
    return (type_id, size, i)
示例#2
0
def pack_object_header_info(data):
    """
    :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
        The type_id should be interpreted according to the ``type_id_to_type_map`` map
        The byte-offset specifies the start of the actual zlib compressed datastream
    :param m: random-access memory, like a string or memory map"""
    c = byte_ord(data[0])  # first byte
    i = 1  # next char to read
    type_id = (c >> 4) & 7  # numeric type
    size = c & 15  # starting size
    s = 4  # starting bit-shift size
    if PY3:
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            size += (c & 0x7f) << s
            s += 7
        # END character loop
    else:
        while c & 0x80:
            c = ord(data[i])
            i += 1
            size += (c & 0x7f) << s
            s += 7
        # END character loop
    # end performance at expense of maintenance ...
    return (type_id, size, i)
示例#3
0
def pack_object_at(cursor, offset, as_stream):
    """
    :return: Tuple(abs_data_offset, PackInfo|PackStream)
        an object of the correct type according to the type_id  of the object.
        If as_stream is True, the object will contain a stream, allowing  the
        data to be read decompressed.
    :param data: random accessible data containing all required information
    :parma offset: offset in to the data at which the object information is located
    :param as_stream: if True, a stream object will be returned that can read
        the data, otherwise you receive an info object only"""
    data = cursor.use_region(offset).buffer()
    type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
    total_rela_offset = None  # set later, actual offset until data stream begins
    delta_info = None

    # OFFSET DELTA
    if type_id == OFS_DELTA:
        i = data_rela_offset
        c = byte_ord(data[i])
        i += 1
        delta_offset = c & 0x7f
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            delta_offset += 1
            delta_offset = (delta_offset << 7) + (c & 0x7f)
        # END character loop
        delta_info = delta_offset
        total_rela_offset = i
    # REF DELTA
    elif type_id == REF_DELTA:
        total_rela_offset = data_rela_offset + 20
        delta_info = data[data_rela_offset:total_rela_offset]
    # BASE OBJECT
    else:
        # assume its a base object
        total_rela_offset = data_rela_offset
    # END handle type id
    abs_data_offset = offset + total_rela_offset
    if as_stream:
        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False,
                                        uncomp_size)
        if delta_info is None:
            return abs_data_offset, OPackStream(offset, type_id, uncomp_size,
                                                stream)
        else:
            return abs_data_offset, ODeltaPackStream(offset, type_id,
                                                     uncomp_size, delta_info,
                                                     stream)
    else:
        if delta_info is None:
            return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
        else:
            return abs_data_offset, ODeltaPackInfo(offset, type_id,
                                                   uncomp_size, delta_info)
示例#4
0
文件: pack.py 项目: yarikoptic/gitdb
def pack_object_at(cursor, offset, as_stream):
    """
    :return: Tuple(abs_data_offset, PackInfo|PackStream)
        an object of the correct type according to the type_id  of the object.
        If as_stream is True, the object will contain a stream, allowing  the
        data to be read decompressed.
    :param data: random accessable data containing all required information
    :parma offset: offset in to the data at which the object information is located
    :param as_stream: if True, a stream object will be returned that can read
        the data, otherwise you receive an info object only"""
    data = cursor.use_region(offset).buffer()
    type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
    total_rela_offset = None                # set later, actual offset until data stream begins
    delta_info = None

    # OFFSET DELTA
    if type_id == OFS_DELTA:
        i = data_rela_offset
        c = byte_ord(data[i])
        i += 1
        delta_offset = c & 0x7f
        while c & 0x80:
            c = byte_ord(data[i])
            i += 1
            delta_offset += 1
            delta_offset = (delta_offset << 7) + (c & 0x7f)
        # END character loop
        delta_info = delta_offset
        total_rela_offset = i
    # REF DELTA
    elif type_id == REF_DELTA:
        total_rela_offset = data_rela_offset + 20
        delta_info = data[data_rela_offset:total_rela_offset]
    # BASE OBJECT
    else:
        # assume its a base object
        total_rela_offset = data_rela_offset
    # END handle type id
    abs_data_offset = offset + total_rela_offset
    if as_stream:
        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
        if delta_info is None:
            return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
        else:
            return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
    else:
        if delta_info is None:
            return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
        else:
            return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
示例#5
0
def is_equal_canonical_sha(canonical_length, match, sha1):
    """
    :return: True if the given lhs and rhs 20 byte binary shas
        The comparison will take the canonical_length of the match sha into account,
        hence the comparison will only use the last 4 bytes for uneven canonical representations
    :param match: less than 20 byte sha
    :param sha1: 20 byte sha"""
    binary_length = canonical_length // 2
    if match[:binary_length] != sha1[:binary_length]:
        return False

    if canonical_length - binary_length and (byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xF0:
        return False
    # END handle uneven canonnical length
    return True
示例#6
0
    def sha_to_index(self, sha):
        """
        :return: index usable with the ``offset`` or ``entry`` method, or None
            if the sha was not found in this pack index
        :param sha: 20 byte sha to lookup"""
        first_byte = byte_ord(sha[0])
        get_sha = self.sha
        lo = 0  # lower index, the left bound of the bisection
        if first_byte != 0:
            lo = self._fanout_table[first_byte - 1]
        hi = self._fanout_table[
            first_byte]  # the upper, right bound of the bisection

        # bisect until we have the sha
        while lo < hi:
            mid = (lo + hi) // 2
            mid_sha = get_sha(mid)
            if sha < mid_sha:
                hi = mid
            elif sha == mid_sha:
                return mid
            else:
                lo = mid + 1
            # END handle midpoint
        # END bisect
        return None
示例#7
0
def is_equal_canonical_sha(canonical_length, match, sha1):
    """
    :return: True if the given lhs and rhs 20 byte binary shas
        The comparison will take the canonical_length of the match sha into account,
        hence the comparison will only use the last 4 bytes for uneven canonical representations
    :param match: less than 20 byte sha
    :param sha1: 20 byte sha"""
    binary_length = canonical_length // 2
    if match[:binary_length] != sha1[:binary_length]:
        return False

    if canonical_length - binary_length and \
            (byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xf0:
        return False
    # END handle uneven canonnical length
    return True
示例#8
0
文件: pack.py 项目: yarikoptic/gitdb
    def sha_to_index(self, sha):
        """
        :return: index usable with the ``offset`` or ``entry`` method, or None
            if the sha was not found in this pack index
        :param sha: 20 byte sha to lookup"""
        first_byte = byte_ord(sha[0])
        get_sha = self.sha
        lo = 0                  # lower index, the left bound of the bisection
        if first_byte != 0:
            lo = self._fanout_table[first_byte - 1]
        hi = self._fanout_table[first_byte]     # the upper, right bound of the bisection

        # bisect until we have the sha
        while lo < hi:
            mid = (lo + hi) // 2
            mid_sha = get_sha(mid)
            if sha < mid_sha:
                hi = mid
            elif sha == mid_sha:
                return mid
            else:
                lo = mid + 1
            # END handle midpoint
        # END bisect
        return None
示例#9
0
    def write(self, pack_sha, write):
        """Write the index file using the given write method
        :param pack_sha: binary sha over the whole pack that we index
        :return: sha1 binary sha over all index file contents"""
        # sort for sha1 hash
        self._objs.sort(key=lambda o: o[0])

        sha_writer = FlexibleSha1Writer(write)
        sha_write = sha_writer.write
        sha_write(PackIndexFile.index_v2_signature)
        sha_write(pack(">L", PackIndexFile.index_version_default))

        # fanout
        tmplist = list((0, ) * 256)  # fanout or list with 64 bit offsets
        for t in self._objs:
            tmplist[byte_ord(t[0][0])] += 1
        # END prepare fanout
        for i in xrange(255):
            v = tmplist[i]
            sha_write(pack('>L', v))
            tmplist[i + 1] += v
        # END write each fanout entry
        sha_write(pack('>L', tmplist[255]))

        # sha1 ordered
        # save calls, that is push them into c
        sha_write(b''.join(t[0] for t in self._objs))

        # crc32
        for t in self._objs:
            sha_write(pack('>L', t[1] & 0xffffffff))
        # END for each crc

        tmplist = list()
        # offset 32
        for t in self._objs:
            ofs = t[2]
            if ofs > 0x7fffffff:
                tmplist.append(ofs)
                ofs = 0x80000000 + len(tmplist) - 1
            # END hande 64 bit offsets
            sha_write(pack('>L', ofs & 0xffffffff))
        # END for each offset

        # offset 64
        for ofs in tmplist:
            sha_write(pack(">Q", ofs))
        # END for each offset

        # trailer
        assert (len(pack_sha) == 20)
        sha_write(pack_sha)
        sha = sha_writer.sha(as_hex=False)
        write(sha)
        return sha
示例#10
0
文件: pack.py 项目: yarikoptic/gitdb
    def write(self, pack_sha, write):
        """Write the index file using the given write method
        :param pack_sha: binary sha over the whole pack that we index
        :return: sha1 binary sha over all index file contents"""
        # sort for sha1 hash
        self._objs.sort(key=lambda o: o[0])

        sha_writer = FlexibleSha1Writer(write)
        sha_write = sha_writer.write
        sha_write(PackIndexFile.index_v2_signature)
        sha_write(pack(">L", PackIndexFile.index_version_default))

        # fanout
        tmplist = list((0,) * 256)                                # fanout or list with 64 bit offsets
        for t in self._objs:
            tmplist[byte_ord(t[0][0])] += 1
        # END prepare fanout
        for i in xrange(255):
            v = tmplist[i]
            sha_write(pack('>L', v))
            tmplist[i + 1] += v
        # END write each fanout entry
        sha_write(pack('>L', tmplist[255]))

        # sha1 ordered
        # save calls, that is push them into c
        sha_write(b''.join(t[0] for t in self._objs))

        # crc32
        for t in self._objs:
            sha_write(pack('>L', t[1] & 0xffffffff))
        # END for each crc

        tmplist = list()
        # offset 32
        for t in self._objs:
            ofs = t[2]
            if ofs > 0x7fffffff:
                tmplist.append(ofs)
                ofs = 0x80000000 + len(tmplist) - 1
            # END hande 64 bit offsets
            sha_write(pack('>L', ofs & 0xffffffff))
        # END for each offset

        # offset 64
        for ofs in tmplist:
            sha_write(pack(">Q", ofs))
        # END for each offset

        # trailer
        assert(len(pack_sha) == 20)
        sha_write(pack_sha)
        sha = sha_writer.sha(as_hex=False)
        write(sha)
        return sha
示例#11
0
    def partial_sha_to_index(self, partial_bin_sha, canonical_length):
        """
        :return: index as in `sha_to_index` or None if the sha was not found in this
            index file
        :param partial_bin_sha: an at least two bytes of a partial binary sha as bytes
        :param canonical_length: length of the original hexadecimal representation of the
            given partial binary sha
        :raise AmbiguousObjectName:"""
        if len(partial_bin_sha) < 2:
            raise ValueError("Require at least 2 bytes of partial sha")

        assert isinstance(partial_bin_sha,
                          bytes), "partial_bin_sha must be bytes"
        first_byte = byte_ord(partial_bin_sha[0])

        get_sha = self.sha
        lo = 0  # lower index, the left bound of the bisection
        if first_byte != 0:
            lo = self._fanout_table[first_byte - 1]
        hi = self._fanout_table[
            first_byte]  # the upper, right bound of the bisection

        # fill the partial to full 20 bytes
        filled_sha = partial_bin_sha + NULL_BYTE * (20 - len(partial_bin_sha))

        # find lowest
        while lo < hi:
            mid = (lo + hi) // 2
            mid_sha = get_sha(mid)
            if filled_sha < mid_sha:
                hi = mid
            elif filled_sha == mid_sha:
                # perfect match
                lo = mid
                break
            else:
                lo = mid + 1
            # END handle midpoint
        # END bisect

        if lo < self.size():
            cur_sha = get_sha(lo)
            if is_equal_canonical_sha(canonical_length, partial_bin_sha,
                                      cur_sha):
                next_sha = None
                if lo + 1 < self.size():
                    next_sha = get_sha(lo + 1)
                if next_sha and next_sha == cur_sha:
                    raise AmbiguousObjectName(partial_bin_sha)
                return lo
            # END if we have a match
        # END if we found something
        return None
示例#12
0
文件: pack.py 项目: yarikoptic/gitdb
    def partial_sha_to_index(self, partial_bin_sha, canonical_length):
        """
        :return: index as in `sha_to_index` or None if the sha was not found in this
            index file
        :param partial_bin_sha: an at least two bytes of a partial binary sha as bytes
        :param canonical_length: lenght of the original hexadecimal representation of the
            given partial binary sha
        :raise AmbiguousObjectName:"""
        if len(partial_bin_sha) < 2:
            raise ValueError("Require at least 2 bytes of partial sha")

        assert isinstance(partial_bin_sha, bytes), "partial_bin_sha must be bytes"
        first_byte = byte_ord(partial_bin_sha[0])

        get_sha = self.sha
        lo = 0                  # lower index, the left bound of the bisection
        if first_byte != 0:
            lo = self._fanout_table[first_byte - 1]
        hi = self._fanout_table[first_byte]     # the upper, right bound of the bisection

        # fill the partial to full 20 bytes
        filled_sha = partial_bin_sha + NULL_BYTE * (20 - len(partial_bin_sha))

        # find lowest
        while lo < hi:
            mid = (lo + hi) // 2
            mid_sha = get_sha(mid)
            if filled_sha < mid_sha:
                hi = mid
            elif filled_sha == mid_sha:
                # perfect match
                lo = mid
                break
            else:
                lo = mid + 1
            # END handle midpoint
        # END bisect

        if lo < self.size():
            cur_sha = get_sha(lo)
            if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
                next_sha = None
                if lo + 1 < self.size():
                    next_sha = get_sha(lo + 1)
                if next_sha and next_sha == cur_sha:
                    raise AmbiguousObjectName(partial_bin_sha)
                return lo
            # END if we have a match
        # END if we found something
        return None