def fetch_history( session: Session ) -> Tuple[List[RemoteNodeHistory], List[StoredNodeHistory]]: remote_history_versions = list_versions( session.s3_client, session.internal_bucket, f"{session.s3_prefix}/{session.sync_metadata_prefix}/history/", ) remote_history = (RemoteNodeHistory.from_s3_object(v) for v in remote_history_versions) stored_history = StoredNodeHistory.select().where( StoredNodeHistory.root_folder == RootFolder.for_session(session)) all_history = list(chain(remote_history, stored_history)) all_history.sort(key=lambda h: h.key) rows = [ HistoryRow.create(key, history) for key, history in groupby(all_history, key=lambda h: h.key) ] for _, remote, stored in rows: if remote: if not stored or remote.etag != stored.remote_history_etag: remote.load(session) else: remote.history = copy(stored.history) return ([r for _, r, s in rows if r is not None], [s for _, _, s in rows if s is not None])
class FileGenerator: gen = Bunch( path=Generator(generate_path), etag=Generator(generate_etag), modified_time=Generator(generate_timestamp), created_time=Generator(generate_timestamp), size=Generator(generate_size), ) def remote(self, deleted=False, **extra_attrs): attrs = { "key": self.key, "etag": self.history_etag, "history": Bunch(etag=self.etag, deleted=deleted), **extra_attrs, } return RemoteNodeHistory(**attrs) def local(self, **extra_attrs): attrs = { "root_folder": Path(self.root_folder.path), "path": self.path, "modified_time": self.modified_time, "created_time": self.created_time, "size": self.size, "etag": self.etag, **extra_attrs, } return LocalNode(**attrs) def stored(self, **extra_attrs): attrs = { "key": self.key, "root_folder": self.root_folder, "local_modified_time": self.modified_time, "local_created_time": self.created_time, "remote_history_etag": self.history_etag, **extra_attrs, } return StoredNodeHistory(**attrs) def new(self): self.path = self.gen.path.new() self.key = hash_path(self.path) self.etag = self.gen.etag.new() self.history_etag = self.gen.etag.new() self.modified_time = self.gen.modified_time.new() self.created_time = self.gen.created_time.new() self.size = self.gen.size.new() return self root_folder = RootFolder(path="/local") @property def base_attrs(self): return {"key": self.key}
def save_history(remote_history: RemoteNodeHistory, node: LocalNode, session: Session) -> SyncActionResult: StoredNodeHistory.create(key=remote_history.key, root_folder=RootFolder.for_session(session), data=cast(NodeHistory, remote_history.history).dict(), local_modified_time=node.created_time, local_created_time=node.modified_time, remote_history_etag=remote_history.etag) return SyncActionResult()
def download( remote_history: RemoteNodeHistory, stored_history: Optional[StoredNodeHistory], session: Session, ) -> SyncActionResult: """ 1. Without local history - Find latest base - Download latest base - Store history in local DB 2. With local history - Diff remote and local history and find shortest path - Fetch deltas one by one patch - Store history in local DB """ history = cast(NodeHistory, remote_history.history) if stored_history is not None: entries, is_absolute = history.diff(stored_history.history) if is_absolute: local_path = file_transfer.download_to_root( session, history.path, entries[0].base_version) entries = entries[1:] else: local_path = session.root_folder.path / history.path if entries: patch_file(session, os.fspath(local_path), [e.key for e in entries]) local_node = LocalNode.create(local_path, session) stored_history.data = history.dict() # type: ignore stored_history.local_modified_time = local_node.created_time stored_history.local_created_time = local_node.modified_time stored_history.remote_history_etag = remote_history.etag else: entries, is_absolute = history.diff(None) local_path = file_transfer.download_to_root(session, history.path, entries[0].base_version) if entries[1:]: patch_file(session, os.fspath(local_path), [e.key for e in entries[1:]]) local_node = LocalNode.create(local_path, session) stored_history = StoredNodeHistory( key=remote_history.key, root_folder=RootFolder.for_session(session), data=history.dict(), local_modified_time=local_node.created_time, local_created_time=local_node.modified_time, remote_history_etag=remote_history.etag) last_entry = entries[-1] file_transfer.download_metadata( session, last_entry.key, "signature", os.fspath(session.signature_folder / last_entry.key)) stored_history.save() return SyncActionResult()
def upload(remote_history: Optional[RemoteNodeHistory], node: LocalNode, session: Session) -> SyncActionResult: """ 1. Without remote history: - Calc signature - Generate id - Create new history - Upload base - Upload history - Store history in local DB 2. With remote history: - Generate key - Calc delta - Calc signature - Upload delta - Upload signature - Add history record - Upload history - Store history in local DB """ new_key = NodeHistoryEntry.generate_key() if remote_history is not None: history = cast(NodeHistory, remote_history.history) with create_temp_file() as delta_path: calc_delta(session, node.local_fspath, history.last.key, delta_path) file_transfer.upload_metadata(session, delta_path, new_key, "delta") delta_size = Path(delta_path).stat().st_size with create_temp_file() as signature_path: calc_signature(session, node.local_fspath, new_key, signature_path) file_transfer.upload_metadata(session, signature_path, new_key, "signature") history.add_entry( NodeHistoryEntry.create_delta_only(new_key, node.calc_etag(), delta_size)) else: with create_temp_file() as signature_path: calc_signature(session, node.local_fspath, new_key, signature_path) file_transfer.upload_metadata(session, signature_path, new_key, "signature") version = file_transfer.upload_to_root(session, node) history = NodeHistory(key=node.key, path=node.path, entries=[]) history.add_entry( NodeHistoryEntry.create_base_only(new_key, node.calc_etag(), version, node.size)) remote_history = RemoteNodeHistory(history=history, key=node.key, etag=None) remote_history.save(session) stored_history = StoredNodeHistory.get_or_none( StoredNodeHistory.key == history.key) if stored_history is not None: stored_history.data = history.dict() stored_history.remote_history_etag = remote_history.etag stored_history.local_modified_time = node.created_time stored_history.local_created_time = node.modified_time stored_history.save() else: StoredNodeHistory.create(key=remote_history.key, root_folder=RootFolder.for_session(session), data=cast(NodeHistory, remote_history.history).dict(), local_modified_time=node.created_time, local_created_time=node.modified_time, remote_history_etag=remote_history.etag) return SyncActionResult()