def move_in_thin_pack(self, path): """Move a specific file containing a pack into the pack directory. :note: The file should be on the same file system as the packs directory. :param path: Path to the pack file. """ data = PackData(path) # Write index for the thin pack (do we really need this?) temppath = os.path.join(self.pack_dir, sha_to_hex(urllib2.randombytes(20))+".tempidx") data.create_index_v2(temppath, self.get_raw) p = Pack.from_objects(data, load_pack_index(temppath)) # Write a full pack version temppath = os.path.join(self.pack_dir, sha_to_hex(urllib2.randombytes(20))+".temppack") write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)), len(p)) pack_sha = load_pack_index(temppath+".idx").objects_sha1() newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha) os.rename(temppath+".pack", newbasename+".pack") os.rename(temppath+".idx", newbasename+".idx") self._add_known_pack(newbasename)
def data(self): """The pack data object being used.""" if self._data is None: self._data = PackData(self._data_path) assert len(self.index) == len(self._data) idx_stored_checksum = self.index.get_pack_checksum() data_stored_checksum = self._data.get_stored_checksum() if idx_stored_checksum != data_stored_checksum: raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), sha_to_hex(data_stored_checksum)) return self._data
def get_raw(self, name): """Obtain the raw text for an object. :param name: sha for the object. :return: tuple with object type and object contents. """ if len(name) == 40: sha = hex_to_sha(name) hexsha = name elif len(name) == 20: sha = name hexsha = None else: raise AssertionError for pack in self.packs: try: return pack.get_raw(sha) except KeyError: pass if hexsha is None: hexsha = sha_to_hex(name) ret = self._get_shafile(hexsha) if ret is not None: return ret.type, ret.as_raw_string() raise KeyError(hexsha)
def read_cache_entry(f): """Read an entry from a cache file. :param f: File-like object to read from :return: tuple with: inode, device, mode, uid, gid, size, sha, flags """ beginoffset = f.tell() ctime = read_cache_time(f) mtime = read_cache_time(f) (ino, dev, mode, uid, gid, size, sha, flags) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2)) name = "" char = f.read(1) while char != "\0": name += char char = f.read(1) # Padding: real_size = (f.tell() - beginoffset + 7) & ~7 f.seek(beginoffset + real_size) return (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha_to_hex(sha), flags)
class PackData(object): """The data contained in a packfile. Pack files can be accessed both sequentially for exploding a pack, and directly with the help of an index to retrieve a specific object. The objects within are either complete or a delta aginst another. The header is variable length. If the MSB of each byte is set then it indicates that the subsequent byte is still part of the header. For the first byte the next MS bits are the type, which tells you the type of object, and whether it is a delta. The LS byte is the lowest bits of the size. For each subsequent byte the LS 7 bits are the next MS bits of the size, i.e. the last byte of the header contains the MS bits of the size. For the complete objects the data is stored as zlib deflated data. The size in the header is the uncompressed object size, so to uncompress you need to just keep feeding data to zlib until you get an object back, or it errors on bad data. This is done here by just giving the complete buffer from the start of the deflated object on. This is bad, but until I get mmap sorted out it will have to do. Currently there are no integrity checks done. Also no attempt is made to try and detect the delta case, or a request for an object at the wrong position. It will all just throw a zlib or KeyError. """ def __init__(self, filename): """Create a PackData object that represents the pack in the given filename. The file must exist and stay readable until the object is disposed of. It must also stay the same size. It will be mapped whenever needed. Currently there is a restriction on the size of the pack as the python mmap implementation is flawed. """ self._filename = filename assert os.path.exists(filename), "%s is not a packfile" % filename self._size = os.path.getsize(filename) self._header_size = 12 assert self._size >= self._header_size, "%s is too small for a packfile" % filename self._read_header() def _read_header(self): f = open(self._filename, 'rb') try: (version, self._num_objects) = \ read_pack_header(f) f.seek(self._size-20) (self._stored_checksum,) = read_pack_tail(f) finally: f.close() def __len__(self): """Returns the number of objects in this pack.""" return self._num_objects def calculate_checksum(self): f = open(self._filename, 'rb') try: map = simple_mmap(f, 0, self._size) return hashlib.sha1(map[:-20]).digest() finally: f.close() def iterobjects(self): offset = self._header_size f = open(self._filename, 'rb') for i in range(len(self)): map = simple_mmap(f, offset, self._size-offset) (type, obj, total_size) = unpack_object(map) yield offset, type, obj offset += total_size f.close() def iterentries(self, ext_resolve_ref=None): found = {} at = {} postponed = defaultdict(list) class Postpone(Exception): """Raised to postpone delta resolving.""" def get_ref_text(sha): if sha in found: return found[sha] if ext_resolve_ref: try: return ext_resolve_ref(sha) except KeyError: pass raise Postpone, (sha, ) todo = list(self.iterobjects()) while todo: (offset, type, obj) = todo.pop(0) at[offset] = (type, obj) assert isinstance(offset, int) assert isinstance(type, int) assert isinstance(obj, tuple) or isinstance(obj, str) try: type, obj = resolve_object(offset, type, obj, get_ref_text, at.__getitem__) except Postpone, (sha, ): postponed[sha].append((offset, type, obj)) else: shafile = ShaFile.from_raw_string(type, obj) sha = shafile.sha().digest() found[sha] = (type, obj) yield sha, offset, shafile.crc32() todo += postponed.get(sha, []) if postponed: raise KeyError([sha_to_hex(h) for h in postponed.keys()])