def test_write_to_file(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: (p | io_transforms.KlioReadFromText(file_path_read) | io_transforms.KlioWriteToText(tmp_path)) # WriteToText will shard files so we iterate through each # file in the directory write_results = [] exp_element_count = 0 for file_name in glob.glob(tmp_path + "*"): if os.path.isfile(os.path.join(tmp_path, file_name)): with open(file_name, "rb") as f: lines = f.readlines() exp_element_count = len(lines) write_results.extend(lines) with open(file_path_read, "rb") as fr: read_results = fr.readlines() assert write_results == read_results actual_counters = p.result.metrics().query()["counters"] assert 2 == len(actual_counters) read_counter = actual_counters[0] write_counter = actual_counters[1] assert exp_element_count == read_counter.committed assert "KlioReadFromText" == read_counter.key.metric.namespace assert "kmsg-read" == read_counter.key.metric.name assert exp_element_count == write_counter.committed assert "KlioWriteToText" == write_counter.key.metric.namespace assert "kmsg-write" == write_counter.key.metric.name
def test_read_from_file(): file_path = os.path.join(FIXTURE_PATH, "elements_text_file.txt") transform = io_transforms.KlioReadFromText(file_path) with test_pipeline.TestPipeline() as p: (p | "Read" >> transform | beam.Map(assert_expected_klio_msg_from_file)) assert transform._REQUIRES_IO_READ_WRAP is False
def test_write_to_file(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: (p | io_transforms.KlioReadFromText(file_path_read) | io_transforms.KlioWriteToText(tmp_path)) # WriteToText will shard files so we iterate through each # file in the directory write_results = [] for file_name in glob.glob(tmp_path + "*"): if os.path.isfile(os.path.join(tmp_path, file_name)): with open(file_name, "rb") as f: write_results.extend(f.readlines()) with open(file_path_read, "rb") as fr: read_results = fr.readlines() assert write_results == read_results
def test_read_from_file(): file_path = os.path.join(FIXTURE_PATH, "elements_text_file.txt") exp_element_count = 0 with open(file_path, "r") as f: exp_element_count = len(f.readlines()) transform = io_transforms.KlioReadFromText(file_path) with test_pipeline.TestPipeline() as p: (p | "Read" >> transform | beam.Map(assert_expected_klio_msg_from_file)) assert transform._reader._REQUIRES_IO_READ_WRAP is False actual_counters = p.result.metrics().query()["counters"] assert 1 == len(actual_counters) assert exp_element_count == actual_counters[0].committed assert "KlioReadFromText" == actual_counters[0].key.metric.namespace assert "kmsg-read" == actual_counters[0].key.metric.name
def test_write_to_avro(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: p | io_transforms.KlioReadFromText( file_path_read) | io_transforms.KlioWriteToAvro( file_path_prefix=tmp_path) files = glob.glob(tmp_path + "*") assert len(files) > 0 assert (os.path.isfile(os.path.join(tmp_path, file_name)) for file_name in files) with test_pipeline.TestPipeline() as p2: p2 | io_transforms.KlioReadFromAvro(file_pattern=( tmp_path + "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)
def test_write_to_avro(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") exp_element_count = 0 with open(file_path_read, "r") as f: exp_element_count = len(f.readlines()) with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: p | io_transforms.KlioReadFromText( file_path_read) | io_transforms.KlioWriteToAvro( file_path_prefix=tmp_path) files = glob.glob(tmp_path + "*") assert len(files) > 0 assert (os.path.isfile(os.path.join(tmp_path, file_name)) for file_name in files) with test_pipeline.TestPipeline() as p2: p2 | io_transforms.KlioReadFromAvro(file_pattern=( tmp_path + "*")) | beam.Map(assert_expected_klio_msg_from_avro_write) actual_counters = p.result.metrics().query()["counters"] assert 2 == len(actual_counters) read_counter = actual_counters[0] write_counter = actual_counters[1] assert exp_element_count == read_counter.committed assert "KlioReadFromText" == read_counter.key.metric.namespace assert "kmsg-read" == read_counter.key.metric.name assert exp_element_count == write_counter.committed assert "KlioWriteToAvro" == write_counter.key.metric.namespace assert "kmsg-write" == write_counter.key.metric.name