Exemplo n.º 1
0
def test_validator_batch_filter(multi_batch_taxi_validator, ):
    total_batch_definition_list: List[BatchDefinition] = [
        v.batch_definition
        for k, v in multi_batch_taxi_validator.batches.items()
    ]

    jan_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={"batch_filter_parameters": {
            "month": "01"
        }})

    jan_batch_definition_list: List[
        BatchDefinition] = jan_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    assert len(jan_batch_definition_list) == 1
    assert jan_batch_definition_list[0]["batch_identifiers"]["month"] == "01"
    assert jan_batch_definition_list[0][
        "id"] == "18653cbf8fb5baf5fbbc5ed95f9ee94d"

    feb_march_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={"index": slice(-1, 0, -1)})

    feb_march_batch_definition_list: List[
        BatchDefinition] = feb_march_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    for i in feb_march_batch_definition_list:
        print(i["batch_identifiers"])
    assert len(feb_march_batch_definition_list) == 2

    batch_definitions_months_set: Set[str] = {
        v.batch_identifiers["month"]
        for v in feb_march_batch_definition_list
    }
    assert batch_definitions_months_set == {"02", "03"}

    jan_march_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={
            "custom_filter_function":
            lambda batch_identifiers: batch_identifiers["month"] == "01" or
            batch_identifiers["month"] == "03"
        })

    jan_march_batch_definition_list: List[
        BatchDefinition] = jan_march_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    for i in jan_march_batch_definition_list:
        print(i["batch_identifiers"])
    assert len(jan_march_batch_definition_list) == 2

    batch_definitions_months_set: Set[str] = {
        v.batch_identifiers["month"]
        for v in jan_march_batch_definition_list
    }
    assert batch_definitions_months_set == {"01", "03"}
Exemplo n.º 2
0
def test_custom_filter_function(
    multi_batch_taxi_validator,
):
    total_batch_definition_list: List[BatchDefinition] = [
        v.batch_definition for k, v in multi_batch_taxi_validator.batches.items()
    ]
    assert len(total_batch_definition_list) == 3

    # Filter to all batch_definitions prior to March
    jan_feb_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={
            "custom_filter_function": lambda batch_identifiers: int(
                batch_identifiers["month"]
            )
            < 3
        }
    )
    jan_feb_batch_definition_list: list = (
        jan_feb_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list
        )
    )
    assert len(jan_feb_batch_definition_list) == 2
    batch_definitions_months_set: Set[str] = {
        v.batch_identifiers["month"] for v in jan_feb_batch_definition_list
    }
    assert batch_definitions_months_set == {"01", "02"}
Exemplo n.º 3
0
    def _get_batch_definition_list_from_batch_request(
        self,
        batch_request: BatchRequestBase,
    ) -> List[BatchDefinition]:
        """
        Retrieve batch_definitions that match batch_request.

        First retrieves all batch_definitions that match batch_request
            - if batch_request also has a batch_filter, then select batch_definitions that match batch_filter.
            - if data_connector has sorters configured, then sort the batch_definition list before returning.

        Args:
            batch_request (BatchRequestBase): BatchRequestBase (BatchRequest without attribute validation) to process

        Returns:
            A list of BatchDefinition objects that match BatchRequest

        """
        self._validate_batch_request(batch_request=batch_request)
        if len(self._data_references_cache) == 0:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = list(
            filter(
                lambda batch_definition:
                batch_definition_matches_batch_request(
                    batch_definition=batch_definition,
                    batch_request=batch_request),
                self._get_batch_definition_list_from_cache(),
            ))

        if len(self.sorters) > 0:
            batch_definition_list = self._sort_batch_definition_list(
                batch_definition_list=batch_definition_list)

        if batch_request.data_connector_query is not None:

            data_connector_query_dict = batch_request.data_connector_query.copy(
            )
            if (batch_request.limit is not None
                    and data_connector_query_dict.get("limit") is None):
                data_connector_query_dict["limit"] = batch_request.limit

            batch_filter_obj: BatchFilter = build_batch_filter(
                data_connector_query_dict=data_connector_query_dict)
            batch_definition_list = batch_filter_obj.select_from_data_connector_query(
                batch_definition_list=batch_definition_list)

        return batch_definition_list
Exemplo n.º 4
0
def test_validator_batch_filter(multi_batch_taxi_validator, ):
    total_batch_definition_list: List[BatchDefinition] = [
        v.batch_definition
        for k, v in multi_batch_taxi_validator.batches.items()
    ]

    jan_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={"batch_filter_parameters": {
            "month": "01"
        }})

    jan_batch_definition_list: List[
        BatchDefinition] = jan_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    assert len(jan_batch_definition_list) == 1
    assert jan_batch_definition_list[0]["batch_identifiers"]["month"] == "01"
    assert jan_batch_definition_list[0][
        "id"] == "0327cfb13205ec8512e1c28e438ab43b"

    feb_march_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={"index": slice(-1, 0, -1)})

    feb_march_batch_definition_list: List[
        BatchDefinition] = feb_march_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    for i in feb_march_batch_definition_list:
        print(i["batch_identifiers"])
    assert len(feb_march_batch_definition_list) == 2

    batch_definitions_months_set: Set[str] = {
        v.batch_identifiers["month"]
        for v in feb_march_batch_definition_list
    }
    assert batch_definitions_months_set == {"02", "03"}

    jan_march_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={
            "custom_filter_function":
            lambda batch_identifiers: batch_identifiers["month"] == "01" or
            batch_identifiers["month"] == "03"
        })

    jan_march_batch_definition_list: List[
        BatchDefinition] = jan_march_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    for i in jan_march_batch_definition_list:
        print(i["batch_identifiers"])
    assert len(jan_march_batch_definition_list) == 2

    batch_definitions_months_set: Set[str] = {
        v.batch_identifiers["month"]
        for v in jan_march_batch_definition_list
    }
    assert batch_definitions_months_set == {"01", "03"}

    # Filter using limit param
    limit_batch_filter: BatchFilter = build_batch_filter(
        data_connector_query_dict={"limit": 2})

    limit_batch_filter_definition_list: List[
        BatchDefinition] = limit_batch_filter.select_from_data_connector_query(
            batch_definition_list=total_batch_definition_list)

    assert len(limit_batch_filter_definition_list) == 2
    assert limit_batch_filter_definition_list[0]["batch_identifiers"][
        "month"] == "01"
    assert (limit_batch_filter_definition_list[0]["id"] ==
            "0327cfb13205ec8512e1c28e438ab43b")
    assert limit_batch_filter_definition_list[1]["batch_identifiers"][
        "month"] == "02"
    assert (limit_batch_filter_definition_list[1]["id"] ==
            "0808e185a52825d22356de2fe00a8f5f")
Exemplo n.º 5
0
)

# The active batch should be December, as this should be the last one loaded. Confirming here.
assert validator_multi_batch.active_batch_definition.batch_identifiers["month"] == "12"

# Get the list of all batches contained by the Validator for use in the BatchFilter
total_batch_definition_list: list = [
    v.batch_definition for k, v in validator_multi_batch.batches.items()
]

# Filter to all batch_definitions prior to December
pre_dec_batch_filter: BatchFilter = build_batch_filter(
    data_connector_query_dict={
        "custom_filter_function": lambda batch_identifiers: int(
            batch_identifiers["month"]
        )
        < 12
        and batch_identifiers["year"] == "2019"
    }
)
pre_dec_batch_definition_list: list = (
    pre_dec_batch_filter.select_from_data_connector_query(
        batch_definition_list=total_batch_definition_list
    )
)

# Get the highest max and lowest min before December
cumulative_max = 0
cumulative_min = np.Inf
for batch_definition in pre_dec_batch_definition_list:
    batch_id: str = batch_definition.id
Exemplo n.º 6
0
        jan_batch_request, feb_batch_request, march_batch_request
    ],
    expectation_suite=suite,
)
assert validator.active_batch_definition.batch_identifiers["month"] == "03"
assert validator.active_batch_definition.batch_identifiers["year"] == "2019"

# Get the list of all batches contained by the Validator for use in the BatchFileter
total_batch_definition_list: List = [
    v.batch_definition for k, v in validator.batches.items()
]

# Filter to all batch_definitions prior to March
jan_feb_batch_filter: BatchFilter = build_batch_filter(
    data_connector_query_dict={
        "custom_filter_function":
        lambda batch_identifiers: int(batch_identifiers["month"]) < 3
    })
jan_feb_batch_definition_list: list = (
    jan_feb_batch_filter.select_from_data_connector_query(
        batch_definition_list=total_batch_definition_list))

# Get the highest max and lowest min between January and February
cumulative_max = 0
cumulative_min = np.Inf
for batch_definition in jan_feb_batch_definition_list:
    batch_id: str = batch_definition.id
    current_max = validator.get_metric(
        MetricConfiguration(
            "column.max",
            metric_domain_kwargs={