def test_avro_io_immutability(): initial_data_path = os.path.join(FIXTURE_PATH, "twitter.avro") with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: p | io_transforms.KlioReadFromAvro( initial_data_path) | io_transforms.KlioWriteToAvro( file_path_prefix=tmp_path, num_shards=0) with test_pipeline.TestPipeline() as p2: p2 | io_transforms.KlioReadFromAvro( file_pattern=tmp_path + "*") | beam.Map(assert_expected_klio_msg_from_avro)
def test_read_from_avro(): file_pattern = os.path.join(FIXTURE_PATH, "twitter.avro") with test_pipeline.TestPipeline() as p: (p | io_transforms.KlioReadFromAvro(file_pattern=file_pattern) | beam.Map(assert_expected_klio_msg_from_avro)) assert io_transforms.KlioReadFromAvro._REQUIRES_IO_READ_WRAP is True
def test_read_from_avro(): file_pattern = os.path.join(FIXTURE_PATH, "twitter.avro") transform = io_transforms.KlioReadFromAvro(file_pattern=file_pattern) with test_pipeline.TestPipeline() as p: p | transform | beam.Map(assert_expected_klio_msg_from_avro) assert transform._reader._REQUIRES_IO_READ_WRAP is True actual_counters = p.result.metrics().query()["counters"] assert 1 == len(actual_counters) assert 2 == actual_counters[0].committed assert "KlioReadFromAvro" == actual_counters[0].key.metric.namespace assert "kmsg-read" == actual_counters[0].key.metric.name
def test_write_to_avro(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: p | io_transforms.KlioReadFromText( file_path_read) | io_transforms.KlioWriteToAvro( file_path_prefix=tmp_path) files = glob.glob(tmp_path + "*") assert len(files) > 0 assert (os.path.isfile(os.path.join(tmp_path, file_name)) for file_name in files) with test_pipeline.TestPipeline() as p2: p2 | io_transforms.KlioReadFromAvro(file_pattern=( tmp_path + "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)
def test_write_to_avro(): file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt") exp_element_count = 0 with open(file_path_read, "r") as f: exp_element_count = len(f.readlines()) with tempfile.TemporaryDirectory() as tmp_path: with test_pipeline.TestPipeline() as p: p | io_transforms.KlioReadFromText( file_path_read) | io_transforms.KlioWriteToAvro( file_path_prefix=tmp_path) files = glob.glob(tmp_path + "*") assert len(files) > 0 assert (os.path.isfile(os.path.join(tmp_path, file_name)) for file_name in files) with test_pipeline.TestPipeline() as p2: p2 | io_transforms.KlioReadFromAvro(file_pattern=( tmp_path + "*")) | beam.Map(assert_expected_klio_msg_from_avro_write) actual_counters = p.result.metrics().query()["counters"] assert 2 == len(actual_counters) read_counter = actual_counters[0] write_counter = actual_counters[1] assert exp_element_count == read_counter.committed assert "KlioReadFromText" == read_counter.key.metric.namespace assert "kmsg-read" == read_counter.key.metric.name assert exp_element_count == write_counter.committed assert "KlioWriteToAvro" == write_counter.key.metric.namespace assert "kmsg-write" == write_counter.key.metric.name