def test_write_read_records(self, get_tmpdir): tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) r.initialize() ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[]) r.write_data( "feed1", "group1", chunk_id=0, data= b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}', ) with timer("Read single record group", log_level="info"): found_count = 0 for i in r.read("feed1", "group1", start_index=0): logger.info("Got record {}".format(i)) found_count += 1 logger.info("Repo metadata: {}".format(r.metadata)) assert found_count > 0
def __init__( self, download_root_dir: str, config: DownloadOperationConfiguration, client: IFeedSource, fetch_all: bool = False, ): """ :param config: configuration for doing the fetch :param client: the client to use to pull data :param force_full_flush: if true, ignore last sync timestamps and fetch all data from source """ if not config: raise ValueError("Must have non-None config") if not download_root_dir: raise ValueError("Must have non-None download root directory path") self.config = config op_dir = os.path.join(download_root_dir, self.config.uuid) logger.debug( "Initializing downloader for operation {}. Will write to path: {}". format(config.uuid, op_dir)) repo_meta = LocalFeedDataRepoMetadata(download_configuration=config, data_write_dir=op_dir) self.local_repo = LocalFeedDataRepo(metadata=repo_meta) self.service_client = client self.fetch_all = fetch_all
def test_initialize(self, get_tmpdir): tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) assert os.listdir(tmpdir) == [] r.initialize() assert os.listdir(tmpdir) == ["metadata.json"]
def reload_metadata(self): """ Re-loads the metadata from the disk to the local instance, overwriting any local value in memory :return: """ with open(self.metadata_file_path) as f: self.metadata = LocalFeedDataRepoMetadata.from_json(json.load(f))
def test_teardown(self, get_tmpdir): tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) r.initialize() assert os.path.isdir(tmpdir) is True r.teardown() assert os.path.isdir(tmpdir) is False
def test_write_read_files(self, get_tmpdir, get_file): """ Test writing chunks of binary data to LocalFeedDataRepo and reading using LocalFeedDataRepo.read_files() """ tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) r.initialize() ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) meta = GroupDownloadResult( feed="feed1", group="group1", total_records=0, status=FeedDownloader.State.in_progress.value, started=datetime.datetime.utcnow(), group_metadata={}, ) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[meta], ) expected_output = [] for chunk_number in range(2): with open(get_file(f"{chunk_number}.data"), "rb") as f: binary_content = f.read() expected_output.append(binary_content) group_metadata = { str(chunk_number): { "test_value": str(chunk_number) } } r.write_data( "feed1", "group1", chunk_id=chunk_number, data=binary_content, ) meta.total_records += 1 meta.group_metadata.update(group_metadata) meta.status = FeedDownloader.State.complete.value meta.ended = datetime.datetime.utcnow() r.flush_metadata() r.reload_metadata() assert r.metadata.download_result.results[0].total_records == 2 assert all([ x in r.metadata.download_result.results[0].group_metadata for x in ["0", "1"] ]) found_count = 0 for idx, file_data in enumerate(r.read_files("feed1", "group1")): found_count += 1 assert isinstance(file_data, FileData) assert file_data.data == expected_output[idx] assert str(idx) in meta.group_metadata assert file_data.metadata["test_value"] == str(idx) assert found_count == 2
def from_disk(cls, path): """ Create a new repo instance from an existing one on disk, loading metadata :param path: :return: """ r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=path)) r.reload_metadata() return r
def test_metadata_flush_reload(self, get_tmpdir): tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) r.initialize() ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[]) r.flush_metadata() r.metadata = None r.reload_metadata() assert r.metadata.download_result.started == ts assert (r.metadata.download_result.status == FeedDownloader.State.in_progress.value)