def runTest(self): k = Weave() k._parents = [frozenset(), frozenset([0]), ] k._weave = [('{', 0), 'first line', ('[', 1), 'line to be deleted', (']', 1), ('{', 1), 'replacement line', ('}', 1), 'last line', ('}', 0), ] k._sha1s = [sha_string('first lineline to be deletedlast line') , sha_string('first linereplacement linelast line')] self.assertEqual(k.get_lines(0), ['first line', 'line to be deleted', 'last line', ]) self.assertEqual(k.get_lines(1), ['first line', 'replacement line', 'last line', ])
def iter_diffs(self): """Compute the diffs one at a time.""" # This is instead of compute_diffs() since we guarantee our ordering of # inventories, we don't have to do any buffering self._find_needed_keys() # We actually use a slightly different ordering. We grab all of the # parents first, and then grab the ordered requests. needed_ids = [k[-1] for k in self.present_parents] needed_ids.extend([k[-1] for k in self.ordered_keys]) inv_to_str = self.repo._serializer.write_inventory_to_string for inv in self.repo.iter_inventories(needed_ids): revision_id = inv.revision_id key = (revision_id,) if key in self.present_parents: # Not a key we will transmit, which is a shame, since because # of that bundles don't work with stacked branches parent_ids = None else: parent_ids = [k[-1] for k in self.parent_map[key]] as_bytes = inv_to_str(inv) self._process_one_record(key, (as_bytes,)) if parent_ids is None: continue diff = self.diffs.pop(key) sha1 = osutils.sha_string(as_bytes) yield revision_id, parent_ids, sha1, diff
def make_file(self, file_id, name, parent_id, content='content\n', revision='new-test-rev'): ie = InventoryFile(file_id, name, parent_id) ie.text_sha1 = osutils.sha_string(content) ie.text_size = len(content) ie.revision = revision return ie
def runTest(self): k = Weave() k._parents = [frozenset(), frozenset([0]), frozenset([0]), frozenset([0,1,2]), ] k._weave = [('{', 0), 'foo {', ('{', 1), ' added in version 1', ('{', 2), ' added in v2', ('}', 2), ' also from v1', ('}', 1), '}', ('}', 0)] k._sha1s = [sha_string('foo {}') , sha_string('foo { added in version 1 also from v1}') , sha_string('foo { added in v2}') , sha_string('foo { added in version 1 added in v2 also from v1}') ] self.assertEqual(k.get_lines(0), ['foo {', '}']) self.assertEqual(k.get_lines(1), ['foo {', ' added in version 1', ' also from v1', '}']) self.assertEqual(k.get_lines(2), ['foo {', ' added in v2', '}']) self.assertEqual(k.get_lines(3), ['foo {', ' added in version 1', ' added in v2', ' also from v1', '}'])
def _install_inventory_records(self, records): if (self._info['serializer'] == self._repository._serializer.format_num and self._repository._serializer.support_altered_by_hack): return self._install_mp_records_keys(self._repository.inventories, records) # Use a 10MB text cache, since these are string xml inventories. Note # that 10MB is fairly small for large projects (a single inventory can # be >5MB). Another possibility is to cache 10-20 inventory texts # instead inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024) # Also cache the in-memory representation. This allows us to create # inventory deltas to apply rather than calling add_inventory from # scratch each time. inventory_cache = lru_cache.LRUCache(10) pb = ui.ui_factory.nested_progress_bar() try: num_records = len(records) for idx, (key, metadata, bytes) in enumerate(records): pb.update('installing inventory', idx, num_records) revision_id = key[-1] parent_ids = metadata['parents'] # Note: This assumes the local ghosts are identical to the # ghosts in the source, as the Bundle serialization # format doesn't record ghosts. p_texts = self._get_parent_inventory_texts(inventory_text_cache, inventory_cache, parent_ids) # Why does to_lines() take strings as the source, it seems that # it would have to cast to a list of lines, which we get back # as lines and then cast back to a string. target_lines = multiparent.MultiParent.from_patch(bytes ).to_lines(p_texts) inv_text = ''.join(target_lines) del target_lines sha1 = osutils.sha_string(inv_text) if sha1 != metadata['sha1']: raise errors.BadBundle("Can't convert to target format") # Add this to the cache so we don't have to extract it again. inventory_text_cache[revision_id] = inv_text target_inv = self._source_serializer.read_inventory_from_string( inv_text) self._handle_root(target_inv, parent_ids) parent_inv = None if parent_ids: parent_inv = inventory_cache.get(parent_ids[0], None) try: if parent_inv is None: self._repository.add_inventory(revision_id, target_inv, parent_ids) else: delta = target_inv._make_delta(parent_inv) self._repository.add_inventory_by_delta(parent_ids[0], delta, revision_id, parent_ids) except errors.UnsupportedInventoryKind: raise errors.IncompatibleRevision(repr(self._repository)) inventory_cache[revision_id] = target_inv finally: pb.finished()
def test_fetch_revision_hash(self): """Ensure that inventory hashes are updated by fetch""" from_tree = self.make_branch_and_tree('tree') from_tree.commit('foo', rev_id='foo-id') to_repo = self.make_to_repository('to') to_repo.fetch(from_tree.branch.repository) recorded_inv_sha1 = to_repo.get_inventory_sha1('foo-id') xml = to_repo.get_inventory_xml('foo-id') computed_inv_sha1 = osutils.sha_string(xml) self.assertEqual(computed_inv_sha1, recorded_inv_sha1)
def runTest(self): k = Weave() k._parents = [frozenset(), frozenset([0])] k._weave = [('{', 0), "first line", ('}', 0), ('{', 1), "second line", ('}', 1)] k._sha1s = [sha_string('first line') , sha_string('first linesecond line')] self.assertEqual(k.get_lines(1), ["first line", "second line"]) self.assertEqual(k.get_lines(0), ["first line"])
def create_deterministic_revid(revid, new_parents): """Create a new deterministic revision id with specified new parents. Prevents suffix to be appended needlessly. :param revid: Original revision id. :return: New revision id """ if "-rebase-" in revid: revid = revid[0:revid.rfind("-rebase-")] return revid + "-rebase-" + osutils.sha_string(":".join(new_parents))[:8]
def _modify_item(self, path, kind, is_executable, data, inv): """Add to or change an item in the inventory.""" # If we've already added this, warn the user that we're ignoring it. # In the future, it might be nice to double check that the new data # is the same as the old but, frankly, exporters should be fixed # not to produce bad data streams in the first place ... existing = self._new_file_ids.get(path) if existing: # We don't warn about directories because it's fine for them # to be created already by a previous rename if kind != 'directory': self.warning("%s already added in this commit - ignoring" % (path,)) return # Create the new InventoryEntry basename, parent_id = self._ensure_directory(path, inv) file_id = self.bzr_file_id(path) ie = inventory.make_entry(kind, basename, parent_id, file_id) ie.revision = self.revision_id if kind == 'file': ie.executable = is_executable # lines = osutils.split_lines(data) ie.text_sha1 = osutils.sha_string(data) ie.text_size = len(data) self.data_for_commit[file_id] = data elif kind == 'directory': self.directory_entries[path] = ie # There are no lines stored for a directory so # make sure the cache used by get_lines knows that self.data_for_commit[file_id] = '' elif kind == 'symlink': ie.symlink_target = self._decode_path(data) # There are no lines stored for a symlink so # make sure the cache used by get_lines knows that self.data_for_commit[file_id] = '' else: self.warning("Cannot import items of kind '%s' yet - ignoring '%s'" % (kind, path)) return # Record it if inv.has_id(file_id): old_ie = inv[file_id] if old_ie.kind == 'directory': self.record_delete(path, old_ie) self.record_changed(path, ie, parent_id) else: try: self.record_new(path, ie) except: print "failed to add path '%s' with entry '%s' in command %s" \ % (path, ie, self.command.id) print "parent's children are:\n%r\n" % (ie.parent_id.children,) raise
def test_filtered_size_sha(self): # check that the size and sha matches what's expected text = 'Foo Bar Baz\n' a = open('a', 'wb') a.write(text) a.close() post_filtered_content = ''.join(_swapcase([text], None)) expected_len = len(post_filtered_content) expected_sha = sha_string(post_filtered_content) self.assertEqual((expected_len,expected_sha), internal_size_sha_file_byname('a', [ContentFilter(_swapcase, _swapcase)]))
def test_fetch_inconsistent_last_changed_entries(self): """If an inventory has odd data we should still get what it references. This test tests that we do fetch a file text created in a revision not being fetched, but referenced from the revision we are fetching when the adjacent revisions to the one being fetched do not reference that text. """ tree = self.make_branch_and_tree('source') revid = tree.commit('old') to_repo = self.make_to_repository('to_repo') to_repo.fetch(tree.branch.repository, revid) # Make a broken revision and fetch it. source = tree.branch.repository source.lock_write() self.addCleanup(source.unlock) source.start_write_group() try: # We need two revisions: OLD and NEW. NEW will claim to need a file # 'FOO' changed in 'OLD'. OLD will not have that file at all. source.texts.insert_record_stream([ versionedfile.FulltextContentFactory(('foo', revid), (), None, 'contents') ]) basis = source.revision_tree(revid) parent_id = basis.path2id('') entry = inventory.make_entry('file', 'foo-path', parent_id, 'foo') entry.revision = revid entry.text_size = len('contents') entry.text_sha1 = osutils.sha_string('contents') inv_sha1, _ = source.add_inventory_by_delta( revid, [(None, 'foo-path', 'foo', entry)], 'new', [revid]) rev = Revision(timestamp=0, timezone=None, committer="Foo Bar <*****@*****.**>", message="Message", inventory_sha1=inv_sha1, revision_id='new', parent_ids=[revid]) source.add_revision(rev.revision_id, rev) except: source.abort_write_group() raise else: source.commit_write_group() to_repo.fetch(source, 'new') to_repo.lock_read() self.addCleanup(to_repo.unlock) self.assertEqual( 'contents', to_repo.texts.get_record_stream( [('foo', revid)], 'unordered', True).next().get_bytes_as('fulltext'))
def runTest(self): # FIXME make the weave, dont poke at it. k = Weave() k._names = ['0', '1', '2'] k._name_map = {'0':0, '1':1, '2':2} k._parents = [frozenset(), frozenset([0]), frozenset([0]), ] k._weave = [('{', 0), "first line", ('}', 0), ('{', 1), "second line", ('}', 1), ('{', 2), "alternative second line", ('}', 2), ] k._sha1s = [sha_string('first line') , sha_string('first linesecond line') , sha_string('first linealternative second line')] self.assertEqual(k.get_lines(0), ["first line"]) self.assertEqual(k.get_lines(1), ["first line", "second line"]) self.assertEqual(k.get_lines('2'), ["first line", "alternative second line"]) self.assertEqual(list(k.get_ancestry(['2'])), ['0', '2'])
def _build_inventory(self, tree_id, ie, path): assert isinstance(path, str) tree = self._repository._git.tree(tree_id) for mode, name, hexsha in tree.entries(): basename = name.decode("utf-8") if path == "": child_path = name else: child_path = urlutils.join(path, name) file_id = self.mapping.generate_file_id(child_path) entry_kind = (mode & 0700000) / 0100000 if entry_kind == 0: child_ie = inventory.InventoryDirectory( file_id, basename, ie.file_id) elif entry_kind == 1: file_kind = (mode & 070000) / 010000 b = self._repository._git.get_blob(hexsha) if file_kind == 0: child_ie = inventory.InventoryFile(file_id, basename, ie.file_id) child_ie.text_sha1 = osutils.sha_string(b.data) elif file_kind == 2: child_ie = inventory.InventoryLink(file_id, basename, ie.file_id) child_ie.text_sha1 = osutils.sha_string("") else: raise AssertionError("Unknown file kind, perms=%o." % (mode, )) child_ie.text_id = b.id child_ie.text_size = len(b.data) else: raise AssertionError("Unknown blob kind, perms=%r." % (mode, )) fs_mode = mode & 0777 child_ie.executable = bool(fs_mode & 0111) child_ie.revision = self.revision_id self._inventory.add(child_ie) if entry_kind == 0: self._build_inventory(hexsha, child_ie, child_path)
def test_fetch_revision_hash(self): """Ensure that inventory hashes are updated by fetch""" from_tree = self.make_branch_and_tree('tree') from_tree.commit('foo', rev_id='foo-id') to_repo = self.make_to_repository('to') to_repo.fetch(from_tree.branch.repository) recorded_inv_sha1 = to_repo.get_revision('foo-id').inventory_sha1 to_repo.lock_read() self.addCleanup(to_repo.unlock) stream = to_repo.inventories.get_record_stream([('foo-id', )], 'unordered', True) bytes = stream.next().get_bytes_as('fulltext') computed_inv_sha1 = osutils.sha_string(bytes) self.assertEqual(computed_inv_sha1, recorded_inv_sha1)
def test_fetch_revision_hash(self): """Ensure that inventory hashes are updated by fetch""" from_tree = self.make_branch_and_tree('tree') from_tree.commit('foo', rev_id='foo-id') to_repo = self.make_to_repository('to') to_repo.fetch(from_tree.branch.repository) recorded_inv_sha1 = to_repo.get_revision('foo-id').inventory_sha1 to_repo.lock_read() self.addCleanup(to_repo.unlock) stream = to_repo.inventories.get_record_stream([('foo-id',)], 'unordered', True) bytes = stream.next().get_bytes_as('fulltext') computed_inv_sha1 = osutils.sha_string(bytes) self.assertEqual(computed_inv_sha1, recorded_inv_sha1)
def test_fetch_inconsistent_last_changed_entries(self): """If an inventory has odd data we should still get what it references. This test tests that we do fetch a file text created in a revision not being fetched, but referenced from the revision we are fetching when the adjacent revisions to the one being fetched do not reference that text. """ tree = self.make_branch_and_tree('source') revid = tree.commit('old') to_repo = self.make_to_repository('to_repo') to_repo.fetch(tree.branch.repository, revid) # Make a broken revision and fetch it. source = tree.branch.repository source.lock_write() self.addCleanup(source.unlock) source.start_write_group() try: # We need two revisions: OLD and NEW. NEW will claim to need a file # 'FOO' changed in 'OLD'. OLD will not have that file at all. source.texts.insert_record_stream([ versionedfile.FulltextContentFactory(('foo', revid), (), None, 'contents')]) basis = source.revision_tree(revid) parent_id = basis.path2id('') entry = inventory.make_entry('file', 'foo-path', parent_id, 'foo') entry.revision = revid entry.text_size = len('contents') entry.text_sha1 = osutils.sha_string('contents') inv_sha1, _ = source.add_inventory_by_delta(revid, [ (None, 'foo-path', 'foo', entry)], 'new', [revid]) rev = Revision(timestamp=0, timezone=None, committer="Foo Bar <*****@*****.**>", message="Message", inventory_sha1=inv_sha1, revision_id='new', parent_ids=[revid]) source.add_revision(rev.revision_id, rev) except: source.abort_write_group() raise else: source.commit_write_group() to_repo.fetch(source, 'new') to_repo.lock_read() self.addCleanup(to_repo.unlock) self.assertEqual('contents', to_repo.texts.get_record_stream([('foo', revid)], 'unordered', True).next().get_bytes_as('fulltext'))
def _validate_inventory(self, inv, revision_id): """At this point we should have generated the BundleTree, so build up an inventory, and make sure the hashes match. """ # Now we should have a complete inventory entry. s = serializer_v5.write_inventory_to_string(inv) sha1 = sha_string(s) # Target revision is the last entry in the real_revisions list rev = self.get_revision(revision_id) if rev.revision_id != revision_id: raise AssertionError() if sha1 != rev.inventory_sha1: open(',,bogus-inv', 'wb').write(s) warning('Inventory sha hash mismatch for revision %s. %s' ' != %s' % (revision_id, sha1, rev.inventory_sha1))
def test_ids(self): """Test detection of files within selected directories.""" inv = inventory.Inventory('TREE_ROOT') for args in [('src', 'directory', 'src-id'), ('doc', 'directory', 'doc-id'), ('src/hello.c', 'file'), ('src/bye.c', 'file', 'bye-id'), ('Makefile', 'file')]: ie = inv.add_path(*args) if args[1] == 'file': ie.text_sha1 = osutils.sha_string('content\n') ie.text_size = len('content\n') inv = self.inv_to_test_inv(inv) self.assertEqual(inv.path2id('src'), 'src-id') self.assertEqual(inv.path2id('src/bye.c'), 'bye-id')
def test_merge_modified(self): # merge_modified stores a map from file id to hash tree = self.make_branch_and_tree('tree') d = {'file-id': osutils.sha_string('hello')} self.build_tree_contents([('tree/somefile', 'hello')]) tree.lock_write() try: tree.add(['somefile'], ['file-id']) tree.set_merge_modified(d) mm = tree.merge_modified() self.assertEquals(mm, d) finally: tree.unlock() mm = tree.merge_modified() self.assertEquals(mm, d)
def _build_inventory(self, tree_id, ie, path): assert isinstance(path, str) tree = self._repository._git.tree(tree_id) for mode, name, hexsha in tree.entries(): basename = name.decode("utf-8") if path == "": child_path = name else: child_path = urlutils.join(path, name) file_id = self.mapping.generate_file_id(child_path) entry_kind = (mode & 0700000) / 0100000 if entry_kind == 0: child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id) elif entry_kind == 1: file_kind = (mode & 070000) / 010000 b = self._repository._git.get_blob(hexsha) if file_kind == 0: child_ie = inventory.InventoryFile(file_id, basename, ie.file_id) child_ie.text_sha1 = osutils.sha_string(b.data) elif file_kind == 2: child_ie = inventory.InventoryLink(file_id, basename, ie.file_id) child_ie.text_sha1 = osutils.sha_string("") else: raise AssertionError( "Unknown file kind, perms=%o." % (mode,)) child_ie.text_id = b.id child_ie.text_size = len(b.data) else: raise AssertionError( "Unknown blob kind, perms=%r." % (mode,)) fs_mode = mode & 0777 child_ie.executable = bool(fs_mode & 0111) child_ie.revision = self.revision_id self._inventory.add(child_ie) if entry_kind == 0: self._build_inventory(hexsha, child_ie, child_path)
def test_add_revision_inventory_sha1(self): repo = self.make_repository('repo') inv = Inventory(revision_id='A') inv.root.revision = 'A' inv.root.file_id = 'fixed-root' repo.lock_write() repo.start_write_group() repo.add_revision('A', Revision('A', committer='B', timestamp=0, timezone=0, message='C'), inv=inv) repo.commit_write_group() repo.unlock() repo.lock_read() self.assertEquals(osutils.sha_string( repo._serializer.write_inventory_to_string(inv)), repo.get_revision('A').inventory_sha1) repo.unlock()
def prepare_inv_with_nested_dirs(self): inv = inventory.Inventory('tree-root') for args in [('src', 'directory', 'src-id'), ('doc', 'directory', 'doc-id'), ('src/hello.c', 'file', 'hello-id'), ('src/bye.c', 'file', 'bye-id'), ('zz', 'file', 'zz-id'), ('src/sub/', 'directory', 'sub-id'), ('src/zz.c', 'file', 'zzc-id'), ('src/sub/a', 'file', 'a-id'), ('Makefile', 'file', 'makefile-id')]: ie = inv.add_path(*args) if args[1] == 'file': ie.text_sha1 = osutils.sha_string('content\n') ie.text_size = len('content\n') return self.inv_to_test_inv(inv)
def get_size_and_sha1(self, file_id): """Return the size and sha1 hash of the given file id. If the file was not locally modified, this is extracted from the base_tree. Rather than re-reading the file. """ new_path = self.id2path(file_id) if new_path is None: return None, None if new_path not in self.patches: # If the entry does not have a patch, then the # contents must be the same as in the base_tree text_size = self.base_tree.get_file_size(file_id) text_sha1 = self.base_tree.get_file_sha1(file_id) return text_size, text_sha1 fileobj = self.get_file(file_id) content = fileobj.read() return len(content), sha_string(content)
def import_git_blob(repo, mapping, path, blob, inv, parent_invs, executable): """Import a git blob object into a bzr repository. :param repo: bzr repository :param path: Path in the tree :param blob: A git blob """ file_id = mapping.generate_file_id(path) text_revision = inv.revision_id repo.texts.add_lines((file_id, text_revision), [(file_id, p[file_id].revision) for p in parent_invs if file_id in p], osutils.split_lines(blob.data)) ie = inv.add_path(path, "file", file_id) ie.revision = text_revision ie.text_size = len(blob.data) ie.text_sha1 = osutils.sha_string(blob.data) ie.executable = executable
def test_add_revision_inventory_sha1(self): repo = self.make_repository('repo') inv = Inventory(revision_id='A') inv.root.revision = 'A' inv.root.file_id = 'fixed-root' repo.lock_write() repo.start_write_group() repo.add_revision('A', Revision('A', committer='B', timestamp=0, timezone=0, message='C'), inv=inv) repo.commit_write_group() repo.unlock() repo.lock_read() self.assertEquals( osutils.sha_string( repo._serializer.write_inventory_to_string(inv)), repo.get_revision('A').inventory_sha1) repo.unlock()
def make_one_file_inventory(self, repo, revision, parents, inv_revision=None, root_revision=None, file_contents=None, make_file_version=True): """Make an inventory containing a version of a file with ID 'a-file'. The file's ID will be 'a-file', and its filename will be 'a file name', stored at the tree root. :param repo: a repository to add the new file version to. :param revision: the revision ID of the new inventory. :param parents: the parents for this revision of 'a-file'. :param inv_revision: if not None, the revision ID to store in the inventory entry. Otherwise, this defaults to revision. :param root_revision: if not None, the inventory's root.revision will be set to this. :param file_contents: if not None, the contents of this file version. Otherwise a unique default (based on revision ID) will be generated. """ inv = Inventory(revision_id=revision) if root_revision is not None: inv.root.revision = root_revision file_id = 'a-file-id' entry = InventoryFile(file_id, 'a file name', 'TREE_ROOT') if inv_revision is not None: entry.revision = inv_revision else: entry.revision = revision entry.text_size = 0 if file_contents is None: file_contents = '%sline\n' % entry.revision entry.text_sha1 = osutils.sha_string(file_contents) inv.add(entry) if make_file_version: repo.texts.add_lines((file_id, revision), [(file_id, parent) for parent in parents], [file_contents]) return inv
def build_helper(self, layout): """This is a helper with the common build_??_dirstate funcs. :param layout: [(num_dirs, files_per_dir)] The number of directories per level, and the number of files to put in it. :return: A DirState object with the given layout. The blocks will be modified in memory, and the object will be write locked. (Callers must save and unlock the object). """ self.build_tree(['dir/']) contents = 'x'*10000 self.build_tree_contents([('file', contents)]) file_stat = os.lstat('file') dir_stat = os.lstat('dir') file_sha1 = osutils.sha_string(contents) state = dirstate.DirState.initialize('state') def create_entries(base, layout): if not layout: return num_dirs, num_files = layout[0] for dnum in xrange(num_dirs): if base: path = '%s/%02d_directory' % (base, dnum) else: path = '%02d_directory' % (dnum,) dir_id = generate_ids.gen_file_id(path) state.add(path, dir_id, 'directory', dir_stat, '') for fnum in xrange(num_files): fname = '%s/%02d_filename' % (path, fnum) file_id = generate_ids.gen_file_id(fname) state.add(fname, file_id, 'file', file_stat, file_sha1) create_entries(path, layout[1:]) create_entries(None, layout) return state
timezone: 0 parents: message: initial null commit inventory: directory . TREE_ROT test@user-1 no properties: branch-nick: test branch """ REV_1_SHORT = """\ bazaar-ng testament short form 1 revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_TESTAMENT) REV_1_SHORT_STRICT = """\ bazaar-ng testament short form 2.1 revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_STRICT_TESTAMENT) REV_1_SHORT_STRICT3 = """\ bazaar testament short form 3 strict revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_STRICT_TESTAMENT3) REV_2_TESTAMENT = """\ bazaar-ng testament version 1
parents: message: initial null commit inventory: directory . TREE_ROT test@user-1 no properties: branch-nick: test branch """ REV_1_SHORT = """\ bazaar-ng testament short form 1 revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_TESTAMENT) REV_1_SHORT_STRICT = """\ bazaar-ng testament short form 2.1 revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_STRICT_TESTAMENT) REV_1_SHORT_STRICT3 = """\ bazaar testament short form 3 strict revision-id: test@user-1 sha1: %s """ % osutils.sha_string(REV_1_STRICT_TESTAMENT3)
def get_sha1(self, path, stat_value=None): """Return the sha1 of a file. """ if path.__class__ is str: abspath = osutils.pathjoin(self.root_utf8, path) else: abspath = osutils.pathjoin(self.root, path) self.stat_count += 1 file_fp = self._fingerprint(abspath, stat_value) if not file_fp: # not a regular file or not existing if path in self._cache: self.removed_count += 1 self.needs_write = True del self._cache[path] return None if path in self._cache: cache_sha1, cache_fp = self._cache[path] else: cache_sha1, cache_fp = None, None if cache_fp == file_fp: ## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1) ## mutter("now = %s", time.time()) self.hit_count += 1 return cache_sha1 self.miss_count += 1 mode = file_fp[FP_MODE_COLUMN] if stat.S_ISREG(mode): if self._filter_provider is None: filters = [] else: filters = self._filter_provider(path=path, file_id=None) digest = self._really_sha1_file(abspath, filters) elif stat.S_ISLNK(mode): target = osutils.readlink(osutils.safe_unicode(abspath)) digest = osutils.sha_string(target.encode('UTF-8')) else: raise errors.BzrError("file %r: unknown file stat mode: %o" % (abspath, mode)) # window of 3 seconds to allow for 2s resolution on windows, # unsynchronized file servers, etc. cutoff = self._cutoff_time() if file_fp[FP_MTIME_COLUMN] >= cutoff \ or file_fp[FP_CTIME_COLUMN] >= cutoff: # changed too recently; can't be cached. we can # return the result and it could possibly be cached # next time. # # the point is that we only want to cache when we are sure that any # subsequent modifications of the file can be detected. If a # modification neither changes the inode, the device, the size, nor # the mode, then we can only distinguish it by time; therefore we # need to let sufficient time elapse before we may cache this entry # again. If we didn't do this, then, for example, a very quick 1 # byte replacement in the file might go undetected. ## mutter('%r modified too recently; not caching', path) self.danger_count += 1 if cache_fp: self.removed_count += 1 self.needs_write = True del self._cache[path] else: ## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d', ## path, time.time(), file_fp[FP_MTIME_COLUMN], ## file_fp[FP_CTIME_COLUMN]) self.update_count += 1 self.needs_write = True self._cache[path] = (digest, file_fp) return digest
def _inventory_add_lines(self, inv_vf, version_id, parents, lines, parent_texts): """See Repository._inventory_add_lines().""" # setup parameters used in original code but not this API self.revision_count += 1 if self.fulltext_when is not None: delta = not self.fulltext_when(self.revision_count) else: delta = inv_vf.delta left_matching_blocks = None random_id = self.random_ids check_content = False # bzrlib.knit.add_lines() but error checking optimised inv_vf._check_add(version_id, lines, random_id, check_content) #################################################################### # bzrlib.knit._add() but skip checking if fulltext better than delta #################################################################### line_bytes = ''.join(lines) digest = osutils.sha_string(line_bytes) present_parents = [] for parent in parents: if inv_vf.has_version(parent): present_parents.append(parent) if parent_texts is None: parent_texts = {} # can only compress against the left most present parent. if (delta and (len(present_parents) == 0 or present_parents[0] != parents[0])): delta = False text_length = len(line_bytes) options = [] if lines: if lines[-1][-1] != '\n': # copy the contents of lines. lines = lines[:] options.append('no-eol') lines[-1] = lines[-1] + '\n' line_bytes += '\n' #if delta: # # To speed the extract of texts the delta chain is limited # # to a fixed number of deltas. This should minimize both # # I/O and the time spend applying deltas. # delta = inv_vf._check_should_delta(present_parents) assert isinstance(version_id, str) content = inv_vf.factory.make(lines, version_id) if delta or (inv_vf.factory.annotated and len(present_parents) > 0): # Merge annotations from parent texts if needed. delta_hunks = inv_vf._merge_annotations(content, present_parents, parent_texts, delta, inv_vf.factory.annotated, left_matching_blocks) if delta: options.append('line-delta') store_lines = inv_vf.factory.lower_line_delta(delta_hunks) size, bytes = inv_vf._data._record_to_data(version_id, digest, store_lines) else: options.append('fulltext') # isinstance is slower and we have no hierarchy. if inv_vf.factory.__class__ == knit.KnitPlainFactory: # Use the already joined bytes saving iteration time in # _record_to_data. size, bytes = inv_vf._data._record_to_data(version_id, digest, lines, [line_bytes]) else: # get mixed annotation + content and feed it into the # serialiser. store_lines = inv_vf.factory.lower_fulltext(content) size, bytes = inv_vf._data._record_to_data(version_id, digest, store_lines) access_memo = inv_vf._data.add_raw_records([size], bytes)[0] inv_vf._index.add_versions( ((version_id, options, access_memo, parents),), random_id=random_id) return digest, text_length, content