Python ConnectorRunner.call_read示例，source_acceptance_test.utils.ConnectorRunner.call_read Python示例

示例#1

0

显示文件

文件： test_full_refresh.py 项目： subodh1810/airbyte

    def test_sequential_reads(self, connector_config, configured_catalog, docker_runner: ConnectorRunner):
        configured_catalog = full_refresh_only_catalog(configured_catalog)
        output = docker_runner.call_read(connector_config, configured_catalog)
        records_1 = [message.record.data for message in output if message.type == Type.RECORD]

        output = docker_runner.call_read(connector_config, configured_catalog)
        records_2 = [message.record.data for message in output if message.type == Type.RECORD]
        serialize = partial(json.dumps, sort_keys=True)

        assert not (
            set(map(serialize, records_1)) - set(map(serialize, records_2))
        ), "The two sequential reads should produce either equal set of records or one of them is a strict subset of the other"

示例#2

0

显示文件

    def test_sequential_reads(
        self,
        inputs: ConnectionTestConfig,
        connector_config: SecretDict,
        configured_catalog: ConfiguredAirbyteCatalog,
        docker_runner: ConnectorRunner,
        detailed_logger: Logger,
    ):
        ignored_fields = getattr(inputs, "ignored_fields") or {}
        configured_catalog = full_refresh_only_catalog(configured_catalog)
        output = docker_runner.call_read(connector_config, configured_catalog)
        records_1 = [
            message.record for message in output if message.type == Type.RECORD
        ]
        records_by_stream_1 = defaultdict(list)
        for record in records_1:
            records_by_stream_1[record.stream].append(record.data)

        output = docker_runner.call_read(connector_config, configured_catalog)
        records_2 = [
            message.record for message in output if message.type == Type.RECORD
        ]
        records_by_stream_2 = defaultdict(list)
        for record in records_2:
            records_by_stream_2[record.stream].append(record.data)

        pks_by_stream = primary_keys_by_stream(configured_catalog)

        for stream in records_by_stream_1:
            if pks_by_stream.get(stream):
                serializer = partial(primary_keys_only,
                                     pks=pks_by_stream.get(stream))
            else:
                serializer = partial(make_hashable,
                                     exclude_fields=ignored_fields.get(stream))
            stream_records_1 = records_by_stream_1.get(stream)
            stream_records_2 = records_by_stream_2.get(stream)
            # Using
            output_diff = set(map(serializer,
                                  stream_records_1)).symmetric_difference(
                                      set(map(serializer, stream_records_2)))
            if output_diff:
                msg = f"{stream}: the two sequential reads should produce either equal set of records or one of them is a strict subset of the other"
                detailed_logger.info(msg)
                detailed_logger.info("First read")
                detailed_logger.log_json_list(stream_records_1)
                detailed_logger.info("Second read")
                detailed_logger.log_json_list(stream_records_2)
                detailed_logger.info("Difference")
                detailed_logger.log_json_list(output_diff)
                pytest.fail(msg)

示例#3

0

显示文件

文件： test_core.py 项目： Mu-L/airbyte

    def test_airbyte_trace_message_on_failure(self, connector_config, inputs: BasicReadTestConfig, docker_runner: ConnectorRunner):
        if not inputs.expect_trace_message_on_failure:
            pytest.skip("Skipping `test_airbyte_trace_message_on_failure` because `inputs.expect_trace_message_on_failure=False`")
            return

        invalid_configured_catalog = ConfiguredAirbyteCatalog(
            streams=[
                # create ConfiguredAirbyteStream without validation
                ConfiguredAirbyteStream.construct(
                    stream=AirbyteStream(
                        name="__AIRBYTE__stream_that_does_not_exist",
                        json_schema={"type": "object", "properties": {"f1": {"type": "string"}}},
                        supported_sync_modes=[SyncMode.full_refresh],
                    ),
                    sync_mode="INVALID",
                    destination_sync_mode="INVALID",
                )
            ]
        )

        output = docker_runner.call_read(connector_config, invalid_configured_catalog, raise_container_error=False)
        trace_messages = filter_output(output, Type.TRACE)
        error_trace_messages = list(filter(lambda m: m.trace.type == TraceType.ERROR, trace_messages))

        assert len(error_trace_messages) >= 1, "Connector should emit at least one error trace message"

示例#4

0

显示文件

文件： test_core.py 项目： Mu-L/airbyte

    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteRecordMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert (
                    pk_value is not None
                ), f"Primary key subkeys {repr(pk_path)} have null values or not present in {record.stream} stream records."

        # TODO: remove this condition after https://github.com/airbytehq/airbyte/issues/8312 is done
        if inputs.validate_data_points:
            self._validate_field_appears_at_least_once(records=records, configured_catalog=configured_catalog)

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )

示例#5

0

显示文件

文件： test_incremental.py 项目： subodh1810/airbyte

    def test_two_sequential_reads(self, connector_config,
                                  configured_catalog_for_incremental,
                                  cursor_paths,
                                  docker_runner: ConnectorRunner):
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), "First incremental sync should produce records younger or equal to cursor value from the state"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value >= state_value
            ), "Second incremental sync should produce records older or equal to cursor value from the state"

示例#6

0

显示文件

文件： test_core.py 项目： NMWDI/airbyte

    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
        detailed_logger,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [message.record for message in filter_output(output, Type.RECORD)]

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_schema:
            self._validate_schema(records=records, configured_catalog=configured_catalog)

        self._validate_empty_streams(records=records, configured_catalog=configured_catalog, allowed_empty_streams=inputs.empty_streams)
        for pks, record in primary_keys_for_records(streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} " f"have null values or not present in {record.stream} stream records."
                )

        if expected_records:
            self._validate_expected_records(
                records=records, expected_records=expected_records, flags=inputs.expect_records, detailed_logger=detailed_logger
            )

示例#7

0

显示文件

文件： test_core.py 项目： tesla-avant/airbyte

    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        for pks, record in primary_keys_for_records(
                streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} "
                    f"have null values or not present in {record.stream} stream records."
                )

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"

        if expected_records:
            actual_by_stream = self.group_by_stream(records)
            expected_by_stream = self.group_by_stream(expected_records)
            for stream_name, expected in expected_by_stream.items():
                actual = actual_by_stream.get(stream_name, [])

                self.compare_records(
                    stream_name=stream_name,
                    actual=actual,
                    expected=expected,
                    extra_fields=inputs.expect_records.extra_fields,
                    exact_order=inputs.expect_records.exact_order,
                    extra_records=inputs.expect_records.extra_records,
                )

示例#8

0

显示文件

文件： test_core.py 项目： rajatariya21/airbyte

    def test_read(self, connector_config, configured_catalog,
                  inputs: BasicReadTestConfig, docker_runner: ConnectorRunner):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"

示例#9

0

显示文件

文件： test_incremental.py 项目： Mu-L/airbyte

    def test_two_sequential_reads(
        self,
        inputs: IncrementalConfig,
        connector_config: SecretDict,
        configured_catalog_for_incremental: ConfiguredAirbyteCatalog,
        cursor_paths: dict[str, list[str]],
        docker_runner: ConnectorRunner,
    ):
        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        output = docker_runner.call_read_with_state(
            connector_config,
            configured_catalog_for_incremental,
            state=latest_state)
        records_2 = filter_output(output, type_=Type.RECORD)

        for record_value, state_value, stream_name in records_with_state(
                records_2, latest_state, stream_mapping, cursor_paths):
            assert compare_cursor_with_threshold(
                record_value, state_value, threshold_days
            ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"

示例#10

0

显示文件

文件： test_incremental.py 项目： Mu-L/airbyte

    def test_read_sequential_slices(self, inputs: IncrementalConfig,
                                    connector_config,
                                    configured_catalog_for_incremental,
                                    cursor_paths,
                                    docker_runner: ConnectorRunner):
        """
        Incremental test that makes calls the read method without a state checkpoint. Then we partition the results by stream and
        slice checkpoints resulting in batches of messages that look like:
        <state message>
        <record message>
        ...
        <record message>

        Using these batches, we then make additional read method calls using the state message and verify the correctness of the
        messages in the response.
        """
        if inputs.skip_comprehensive_incremental_tests:
            pytest.skip(
                "Skipping new incremental test based on acceptance-test-config.yml"
            )
            return

        threshold_days = getattr(inputs, "threshold_days") or 0
        stream_mapping = {
            stream.stream.name: stream
            for stream in configured_catalog_for_incremental.streams
        }

        output = docker_runner.call_read(connector_config,
                                         configured_catalog_for_incremental)
        records_1 = filter_output(output, type_=Type.RECORD)
        states_1 = filter_output(output, type_=Type.STATE)

        assert states_1, "Should produce at least one state"
        assert records_1, "Should produce at least one record"

        latest_state = states_1[-1].state.data
        for record_value, state_value, stream_name in records_with_state(
                records_1, latest_state, stream_mapping, cursor_paths):
            assert (
                record_value <= state_value
            ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}"

        # Create partitions made up of one state message followed by any records that come before the next state
        filtered_messages = [
            message for message in output
            if message.type == Type.STATE or message.type == Type.RECORD
        ]
        right_index = len(filtered_messages)
        checkpoint_messages = []
        for index, message in reversed(list(enumerate(filtered_messages))):
            if message.type == Type.STATE:
                message_group = (filtered_messages[index],
                                 filtered_messages[index + 1:right_index])
                checkpoint_messages.insert(0, message_group)
                right_index = index

        # We sometimes have duplicate identical state messages in a stream which we can filter out to speed things up
        checkpoint_messages = [
            message for index, message in enumerate(checkpoint_messages)
            if message not in checkpoint_messages[:index]
        ]

        # To avoid spamming APIs we only test a fraction of slices
        num_slices_to_test = 1 if len(
            checkpoint_messages) <= 5 else len(checkpoint_messages) // 5
        for message_batch in checkpoint_messages[::num_slices_to_test]:
            assert len(
                message_batch) > 0 and message_batch[0].type == Type.STATE
            current_state = message_batch[0]
            output = docker_runner.call_read_with_state(
                connector_config, configured_catalog_for_incremental,
                current_state.state.data)
            records = filter_output(output, type_=Type.RECORD)

            for record_value, state_value, stream_name in records_with_state(
                    records, current_state.state.data, stream_mapping,
                    cursor_paths):
                assert compare_cursor_with_threshold(
                    record_value, state_value, threshold_days
                ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}"

示例#11

0

显示文件

    def test_read(
        self,
        connector_config,
        configured_catalog,
        inputs: BasicReadTestConfig,
        expected_records: List[AirbyteMessage],
        docker_runner: ConnectorRunner,
    ):
        output = docker_runner.call_read(connector_config, configured_catalog)
        records = [
            message.record for message in output if message.type == Type.RECORD
        ]
        counter = Counter(record.stream for record in records)
        if inputs.validate_schema:
            bar = "-" * 80
            streams_errors = verify_records_schema(records, configured_catalog)
            for stream_name, errors in streams_errors.items():
                errors = map(str, errors.values())
                str_errors = f"\n{bar}\n".join(errors)
                logging.error(
                    f"The {stream_name} stream has the following schema errors:\n{str_errors}"
                )

            if streams_errors:
                pytest.fail(
                    f"Please check your json_schema in selected streams {streams_errors.keys()}."
                )

        all_streams = set(stream.stream.name
                          for stream in configured_catalog.streams)
        streams_with_records = set(counter.keys())
        streams_without_records = all_streams - streams_with_records

        assert records, "At least one record should be read using provided catalog"

        for pks, record in primary_keys_for_records(
                streams=configured_catalog.streams, records=records):
            for pk_path, pk_value in pks.items():
                assert pk_value is not None, (
                    f"Primary key subkeys {repr(pk_path)} "
                    f"have null values or not present in {record.stream} stream records."
                )

        if inputs.validate_output_from_all_streams:
            assert (
                not streams_without_records
            ), f"All streams should return some records, streams without records: {streams_without_records}"

        if expected_records:
            actual_by_stream = self.group_by_stream(records)
            expected_by_stream = self.group_by_stream(expected_records)
            for stream_name, expected in expected_by_stream.items():
                actual = actual_by_stream.get(stream_name, [])

                self.compare_records(
                    stream_name=stream_name,
                    actual=actual,
                    expected=expected,
                    extra_fields=inputs.expect_records.extra_fields,
                    exact_order=inputs.expect_records.exact_order,
                    extra_records=inputs.expect_records.extra_records,
                )