示例#1
0
    def _set_cache_too_slow_without_c(self, attr):
        # the direct algorithm is fastest and most direct if there is only one
        # delta. Also, the extra overhead might not be worth it for items smaller
        # than X - definitely the case in python, every function call costs
        # huge amounts of time
        # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
        if len(self._dstreams) == 1:
            return self._set_cache_brute_(attr)

        # Aggregate all deltas into one delta in reverse order. Hence we take
        # the last delta, and reverse-merge its ancestor delta, until we receive
        # the final delta data stream.
        dcl = connect_deltas(self._dstreams)

        # call len directly, as the (optional) c version doesn't implement the sequence
        # protocol
        if dcl.rbound() == 0:
            self._size = 0
            self._mm_target = allocate_memory(0)
            return
        # END handle empty list

        self._size = dcl.rbound()
        self._mm_target = allocate_memory(self._size)

        bbuf = allocate_memory(self._bstream.size)
        stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)

        # APPLY CHUNKS
        write = self._mm_target.write
        dcl.apply(bbuf, write)

        self._mm_target.seek(0)
示例#2
0
    def _set_cache_too_slow_without_c(self, attr):
        # the direct algorithm is fastest and most direct if there is only one
        # delta. Also, the extra overhead might not be worth it for items smaller
        # than X - definitely the case in python, every function call costs
        # huge amounts of time
        # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
        if len(self._dstreams) == 1:
            return self._set_cache_brute_(attr)

        # Aggregate all deltas into one delta in reverse order. Hence we take
        # the last delta, and reverse-merge its ancestor delta, until we receive
        # the final delta data stream.
        # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
        dcl = connect_deltas(self._dstreams)

        # call len directly, as the (optional) c version doesn't implement the sequence
        # protocol
        if dcl.rbound() == 0:
            self._size = 0
            self._mm_target = allocate_memory(0)
            return
        # END handle empty list

        self._size = dcl.rbound()
        self._mm_target = allocate_memory(self._size)

        bbuf = allocate_memory(self._bstream.size)
        stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)

        # APPLY CHUNKS
        write = self._mm_target.write
        dcl.apply(bbuf, write)

        self._mm_target.seek(0)
示例#3
0
文件: pack.py 项目: yarikoptic/gitdb
    def _iter_objects(self, start_offset, as_stream=True):
        """Handle the actual iteration of objects within this pack"""
        c = self._cursor
        content_size = c.file_size() - self.footer_size
        cur_offset = start_offset or self.first_object_offset

        null = NullStream()
        while cur_offset < content_size:
            data_offset, ostream = pack_object_at(c, cur_offset, True)
            # scrub the stream to the end - this decompresses the object, but yields
            # the amount of compressed bytes we need to get to the next offset

            stream_copy(ostream.read, null.write, ostream.size, chunk_size)
            cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()

            # if a stream is requested, reset it beforehand
            # Otherwise return the Stream object directly, its derived from the
            # info object
            if as_stream:
                ostream.stream.seek(0)
            yield ostream
示例#4
0
    def _iter_objects(self, start_offset, as_stream=True):
        """Handle the actual iteration of objects within this pack"""
        c = self._cursor
        content_size = c.file_size() - self.footer_size
        cur_offset = start_offset or self.first_object_offset

        null = NullStream()
        while cur_offset < content_size:
            data_offset, ostream = pack_object_at(c, cur_offset, True)
            # scrub the stream to the end - this decompresses the object, but yields
            # the amount of compressed bytes we need to get to the next offset

            stream_copy(ostream.read, null.write, ostream.size, chunk_size)
            cur_offset += (data_offset - ostream.pack_offset
                           ) + ostream.stream.compressed_bytes_read()

            # if a stream is requested, reset it beforehand
            # Otherwise return the Stream object directly, its derived from the
            # info object
            if as_stream:
                ostream.stream.seek(0)
            yield ostream
示例#5
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
示例#6
0
    def _set_cache_brute_(self, attr):
        """If we are here, we apply the actual deltas"""
        # TODO: There should be a special case if there is only one stream
        # Then the default-git algorithm should perform a tad faster, as the
        # delta is not peaked into, causing less overhead.
        buffer_info_list = list()
        max_target_size = 0
        for dstream in self._dstreams:
            buf = dstream.read(512)         # read the header information + X
            offset, src_size = msb_size(buf)
            offset, target_size = msb_size(buf, offset)
            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
            max_target_size = max(max_target_size, target_size)
        # END for each delta stream

        # sanity check - the first delta to apply should have the same source
        # size as our actual base stream
        base_size = self._bstream.size
        target_size = max_target_size

        # if we have more than 1 delta to apply, we will swap buffers, hence we must
        # assure that all buffers we use are large enough to hold all the results
        if len(self._dstreams) > 1:
            base_size = target_size = max(base_size, max_target_size)
        # END adjust buffer sizes

        # Allocate private memory map big enough to hold the first base buffer
        # We need random access to it
        bbuf = allocate_memory(base_size)
        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)

        # allocate memory map large enough for the largest (intermediate) target
        # We will use it as scratch space for all delta ops. If the final
        # target buffer is smaller than our allocated space, we just use parts
        # of it upon return.
        tbuf = allocate_memory(target_size)

        # for each delta to apply, memory map the decompressed delta and
        # work on the op-codes to reconstruct everything.
        # For the actual copying, we use a seek and write pattern of buffer
        # slices.
        final_target_size = None
        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
            # allocate a buffer to hold all delta data - fill in the data for
            # fast access. We do this as we know that reading individual bytes
            # from our stream would be slower than necessary ( although possible )
            # The dbuf buffer contains commands after the first two MSB sizes, the
            # offset specifies the amount of bytes read to get the sizes.
            ddata = allocate_memory(dstream.size - offset)
            ddata.write(dbuf)
            # read the rest from the stream. The size we give is larger than necessary
            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)

            #######################################################################
            if 'c_apply_delta' in globals():
                c_apply_delta(bbuf, ddata, tbuf)
            else:
                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
            #######################################################################

            # finally, swap out source and target buffers. The target is now the
            # base for the next delta to apply
            bbuf, tbuf = tbuf, bbuf
            bbuf.seek(0)
            tbuf.seek(0)
            final_target_size = target_size
        # END for each delta to apply

        # its already seeked to 0, constrain it to the actual size
        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
        # is not tbuf, but bbuf !
        self._mm_target = bbuf
        self._size = final_target_size
示例#7
0
    def store(self, istream):
        """note: The sha we produce will be hex by nature"""
        tmp_path = None
        writer = self.ostream()
        if writer is None:
            # open a tmp file to write the data to
            fd, tmp_path = tempfile.mkstemp(prefix="obj", dir=self._root_path)

            if istream.binsha is None:
                writer = FDCompressedSha1Writer(fd)
            else:
                writer = FDStream(fd)
            # END handle direct stream copies
        # END handle custom writer

        try:
            try:
                if istream.binsha is not None:
                    # copy as much as possible, the actual uncompressed item size might
                    # be smaller than the compressed version
                    stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
                else:
                    # write object with header, we have to make a new one
                    write_object(
                        istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size
                    )
                # END handle direct stream copies
            finally:
                if tmp_path:
                    writer.close()
            # END assure target stream is closed
        except:
            if tmp_path:
                os.remove(tmp_path)
            raise
        # END assure tmpfile removal on error

        hexsha = None
        if istream.binsha:
            hexsha = istream.hexsha
        else:
            hexsha = writer.sha(as_hex=True)
        # END handle sha

        if tmp_path:
            obj_path = self.db_path(self.object_path(hexsha))
            obj_dir = dirname(obj_path)
            if not isdir(obj_dir):
                mkdir(obj_dir)
            # END handle destination directory
            # rename onto existing doesn't work on windows
            if os.name == "nt":
                if isfile(obj_path):
                    remove(tmp_path)
                else:
                    rename(tmp_path, obj_path)
                # end rename only if needed
            else:
                rename(tmp_path, obj_path)
            # END handle win32

            # make sure its readable for all ! It started out as rw-- tmp file
            # but needs to be rwrr
            chmod(obj_path, self.new_objects_mode)
        # END handle dry_run

        istream.binsha = hex_to_bin(hexsha)
        return istream
示例#8
0
    def store(self, istream):
        """note: The sha we produce will be hex by nature"""
        tmp_path = None
        writer = self.ostream()
        if writer is None:
            # open a tmp file to write the data to
            fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)

            if istream.binsha is None:
                writer = FDCompressedSha1Writer(fd)
            else:
                writer = FDStream(fd)
            # END handle direct stream copies
        # END handle custom writer

        try:
            try:
                if istream.binsha is not None:
                    # copy as much as possible, the actual uncompressed item size might
                    # be smaller than the compressed version
                    stream_copy(istream.read, writer.write, sys.maxint,
                                self.stream_chunk_size)
                else:
                    # write object with header, we have to make a new one
                    write_object(istream.type,
                                 istream.size,
                                 istream.read,
                                 writer.write,
                                 chunk_size=self.stream_chunk_size)
                # END handle direct stream copies
            finally:
                if tmp_path:
                    writer.close()
            # END assure target stream is closed
        except:
            if tmp_path:
                os.remove(tmp_path)
            raise
        # END assure tmpfile removal on error

        hexsha = None
        if istream.binsha:
            hexsha = istream.hexsha
        else:
            hexsha = writer.sha(as_hex=True)
        # END handle sha

        if tmp_path:
            obj_path = self.db_path(self.object_path(hexsha))
            obj_dir = dirname(obj_path)
            if not isdir(obj_dir):
                mkdir(obj_dir)
            # END handle destination directory
            # rename onto existing doesn't work on windows
            if os.name == 'nt' and isfile(obj_path):
                remove(obj_path)
            # END handle win322
            rename(tmp_path, obj_path)

            # make sure its readable for all ! It started out as rw-- tmp file
            # but needs to be rwrr
            chmod(obj_path, self.new_objects_mode)
        # END handle dry_run

        istream.binsha = hex_to_bin(hexsha)
        return istream