Пример #1
0
def test_avro_io_immutability():

    initial_data_path = os.path.join(FIXTURE_PATH, "twitter.avro")

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:

            p | io_transforms.KlioReadFromAvro(
                initial_data_path) | io_transforms.KlioWriteToAvro(
                    file_path_prefix=tmp_path, num_shards=0)

        with test_pipeline.TestPipeline() as p2:

            p2 | io_transforms.KlioReadFromAvro(
                file_pattern=tmp_path +
                "*") | beam.Map(assert_expected_klio_msg_from_avro)
Пример #2
0
def test_read_from_avro():
    file_pattern = os.path.join(FIXTURE_PATH, "twitter.avro")

    with test_pipeline.TestPipeline() as p:
        (p
         | io_transforms.KlioReadFromAvro(file_pattern=file_pattern)
         | beam.Map(assert_expected_klio_msg_from_avro))

    assert io_transforms.KlioReadFromAvro._REQUIRES_IO_READ_WRAP is True
Пример #3
0
def test_read_from_avro():
    file_pattern = os.path.join(FIXTURE_PATH, "twitter.avro")

    transform = io_transforms.KlioReadFromAvro(file_pattern=file_pattern)
    with test_pipeline.TestPipeline() as p:
        p | transform | beam.Map(assert_expected_klio_msg_from_avro)

    assert transform._reader._REQUIRES_IO_READ_WRAP is True

    actual_counters = p.result.metrics().query()["counters"]
    assert 1 == len(actual_counters)
    assert 2 == actual_counters[0].committed
    assert "KlioReadFromAvro" == actual_counters[0].key.metric.namespace
    assert "kmsg-read" == actual_counters[0].key.metric.name
Пример #4
0
def test_write_to_avro():

    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:

            p | io_transforms.KlioReadFromText(
                file_path_read) | io_transforms.KlioWriteToAvro(
                    file_path_prefix=tmp_path)

        files = glob.glob(tmp_path + "*")
        assert len(files) > 0
        assert (os.path.isfile(os.path.join(tmp_path, file_name))
                for file_name in files)

        with test_pipeline.TestPipeline() as p2:
            p2 | io_transforms.KlioReadFromAvro(file_pattern=(
                tmp_path +
                "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)
Пример #5
0
def test_write_to_avro():

    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")
    exp_element_count = 0
    with open(file_path_read, "r") as f:
        exp_element_count = len(f.readlines())

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:

            p | io_transforms.KlioReadFromText(
                file_path_read) | io_transforms.KlioWriteToAvro(
                    file_path_prefix=tmp_path)

        files = glob.glob(tmp_path + "*")
        assert len(files) > 0
        assert (os.path.isfile(os.path.join(tmp_path, file_name))
                for file_name in files)

        with test_pipeline.TestPipeline() as p2:
            p2 | io_transforms.KlioReadFromAvro(file_pattern=(
                tmp_path +
                "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)

    actual_counters = p.result.metrics().query()["counters"]
    assert 2 == len(actual_counters)

    read_counter = actual_counters[0]
    write_counter = actual_counters[1]

    assert exp_element_count == read_counter.committed
    assert "KlioReadFromText" == read_counter.key.metric.namespace
    assert "kmsg-read" == read_counter.key.metric.name

    assert exp_element_count == write_counter.committed
    assert "KlioWriteToAvro" == write_counter.key.metric.namespace
    assert "kmsg-write" == write_counter.key.metric.name