示例#1
0
    def get_batch_definition_list_from_batch_request(self, batch_request: BatchRequest):
        self._validate_batch_request(batch_request=batch_request)

        if len(self._data_references_cache) == 0:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = []
        try:
            sub_cache = self._data_references_cache[batch_request.data_asset_name]
        except KeyError as e:
            raise KeyError(
                f"data_asset_name {batch_request.data_asset_name} is not recognized."
            )

        for batch_identifiers in sub_cache:
            batch_definition: BatchDefinition = BatchDefinition(
                datasource_name=self.datasource_name,
                data_connector_name=self.name,
                data_asset_name=batch_request.data_asset_name,
                batch_identifiers=IDDict(batch_identifiers),
                batch_spec_passthrough=batch_request.batch_spec_passthrough,
            )
            if batch_definition_matches_batch_request(batch_definition, batch_request):
                batch_definition_list.append(batch_definition)

        return batch_definition_list
示例#2
0
    def get_batch_definition_list_from_batch_request(self, batch_request: BatchRequest):
        self._validate_batch_request(batch_request=batch_request)

        if self._data_references_cache is None:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = []

        try:
            sub_cache = self._data_references_cache[batch_request.data_asset_name]
        except KeyError as e:
            raise KeyError(
                f"data_asset_name {batch_request.data_asset_name} is not recognized."
            )

        for partition_definition in sub_cache:
            batch_definition: BatchDefinition = BatchDefinition(
                datasource_name=self.datasource_name,
                data_connector_name=self.name,
                data_asset_name=batch_request.data_asset_name,
                partition_definition=PartitionDefinition(partition_definition),
            )
            if batch_definition_matches_batch_request(batch_definition, batch_request):
                batch_definition_list.append(batch_definition)

        return batch_definition_list
示例#3
0
    def get_batch_definition_list_from_batch_request(
        self,
        batch_request: BatchRequest,
    ) -> List[BatchDefinition]:
        self._validate_batch_request(batch_request=batch_request)

        partition_identifiers: Optional[dict] = None
        if batch_request.partition_request:
            self._validate_partition_identifiers(
                partition_identifiers=batch_request.partition_request.get(
                    "partition_identifiers"))
            partition_identifiers = batch_request.partition_request.get(
                "partition_identifiers")
        if not partition_identifiers:
            partition_identifiers = {}

        batch_definition_list: List[BatchDefinition]

        batch_definition: BatchDefinition = BatchDefinition(
            datasource_name=self.datasource_name,
            data_connector_name=self.name,
            data_asset_name=DEFAULT_DATA_ASSET_NAME,
            partition_definition=PartitionDefinition(partition_identifiers),
        )

        if batch_definition_matches_batch_request(
                batch_definition=batch_definition,
                batch_request=batch_request):
            batch_definition_list = [batch_definition]
        else:
            batch_definition_list = []

        return batch_definition_list
示例#4
0
    def get_batch_definition_list_from_batch_request(
        self,
        batch_request: BatchRequest,
    ) -> List[BatchDefinition]:
        self._validate_batch_request(batch_request=batch_request)

        if self._data_references_cache is None:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = list(
            filter(
                lambda batch_definition:
                batch_definition_matches_batch_request(
                    batch_definition=batch_definition,
                    batch_request=batch_request),
                self._get_batch_definition_list_from_cache(),
            ))

        if batch_request.partition_request is not None:
            partition_query_obj: PartitionQuery = build_partition_query(
                partition_request_dict=batch_request.partition_request)
            batch_definition_list = partition_query_obj.select_from_partition_request(
                batch_definition_list=batch_definition_list)

        if len(self.sorters) > 0:
            sorted_batch_definition_list = self._sort_batch_definition_list(
                batch_definition_list=batch_definition_list)
            return sorted_batch_definition_list
        else:
            return batch_definition_list
示例#5
0
    def _get_batch_definition_list_from_batch_request(
        self,
        batch_request: BatchRequestBase,
    ) -> List[BatchDefinition]:
        """
        Retrieve batch_definitions that match batch_request.

        First retrieves all batch_definitions that match batch_request
            - if batch_request also has a batch_filter, then select batch_definitions that match batch_filter.
            - if data_connector has sorters configured, then sort the batch_definition list before returning.

        Args:
            batch_request (BatchRequestBase): BatchRequestBase (BatchRequest without attribute validation) to process

        Returns:
            A list of BatchDefinition objects that match BatchRequest

        """
        self._validate_batch_request(batch_request=batch_request)
        if len(self._data_references_cache) == 0:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = list(
            filter(
                lambda batch_definition:
                batch_definition_matches_batch_request(
                    batch_definition=batch_definition,
                    batch_request=batch_request),
                self._get_batch_definition_list_from_cache(),
            ))

        if len(self.sorters) > 0:
            batch_definition_list = self._sort_batch_definition_list(
                batch_definition_list=batch_definition_list)

        if batch_request.data_connector_query is not None:

            data_connector_query_dict = batch_request.data_connector_query.copy(
            )
            if (batch_request.limit is not None
                    and data_connector_query_dict.get("limit") is None):
                data_connector_query_dict["limit"] = batch_request.limit

            batch_filter_obj: BatchFilter = build_batch_filter(
                data_connector_query_dict=data_connector_query_dict)
            batch_definition_list = batch_filter_obj.select_from_data_connector_query(
                batch_definition_list=batch_definition_list)

        return batch_definition_list
示例#6
0
    def _get_batch_definition_list_from_batch_request(
        self,
        batch_request: BatchRequestBase,
    ) -> List[BatchDefinition]:
        """
        Retrieve batch_definitions that match batch_request.

        First retrieves all batch_definitions that match batch_request
            - if batch_request also has a partition_query, then select batch_definitions that match partition_query.
            - if data_connector has sorters configured, then sort the batch_definition list before returning.

        Args:
            batch_request (BatchRequestBase): BatchRequestBase (BatchRequest without attribute validation) to process

        Returns:
            A list of BatchDefinition objects that match BatchRequest

        """
        self._validate_batch_request(batch_request=batch_request)

        if self._data_references_cache is None:
            self._refresh_data_references_cache()

        batch_definition_list: List[BatchDefinition] = list(
            filter(
                lambda batch_definition: batch_definition_matches_batch_request(
                    batch_definition=batch_definition, batch_request=batch_request
                ),
                self._get_batch_definition_list_from_cache(),
            )
        )

        if batch_request.partition_request is not None:
            partition_query_obj: PartitionQuery = build_partition_query(
                partition_request_dict=batch_request.partition_request
            )
            batch_definition_list = partition_query_obj.select_from_partition_request(
                batch_definition_list=batch_definition_list
            )

        if len(self.sorters) > 0:
            sorted_batch_definition_list = self._sort_batch_definition_list(
                batch_definition_list=batch_definition_list
            )
            return sorted_batch_definition_list
        else:
            return batch_definition_list
示例#7
0
def test_batch_definition_matches_batch_request():
    my_batch_definition = BatchDefinition(
        datasource_name="test_environment",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="TestFiles",
        batch_identifiers=IDDict({
            "name": "eugene",
            "timestamp": "20200809",
            "price": "1500"
        }),
    )

    # fully matching_batch_request
    my_batch_request = BatchRequest(
        datasource_name="test_environment",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="TestFiles",
        data_connector_query=None,
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is True)

    # execution environment doesn't match
    my_batch_request = BatchRequest(
        datasource_name="i_dont_match",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="TestFiles",
        data_connector_query=None,
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is False)

    # data_connector_name doesn't match
    my_batch_request = BatchRequest(
        datasource_name="test_environment",
        data_connector_name="i_dont_match",
        data_asset_name="TestFiles",
        data_connector_query=None,
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is False)

    # data_asset_name doesn't match
    my_batch_request = BatchRequest(
        datasource_name="test_environment",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="i_dont_match",
        data_connector_query=None,
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is False)

    # batch_request.data_connector_query.batch_filter_parameters is not dict
    my_batch_request = BatchRequest(
        datasource_name="test_environment",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="TestFiles",
        data_connector_query={"batch_filter_parameters": 1},
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is False)

    # batch_identifiers do not match batch_definition.batch_identifiers
    my_batch_request = BatchRequest(
        datasource_name="test_environment",
        data_connector_name="general_filesystem_data_connector",
        data_asset_name="TestFiles",
        data_connector_query={"batch_filter_parameters": {
            "i": "wont_work"
        }},
    )
    assert (batch_definition_matches_batch_request(my_batch_definition,
                                                   my_batch_request) is False)
def test__batch_definition_matches_batch_request():
    # TODO: <Alex>We need to cleanup PyCharm warnings.</Alex>
    A = BatchDefinition(
        datasource_name="A",
        data_connector_name="a",
        data_asset_name="aaa",
        partition_definition=PartitionDefinition({
            "id": "A",
        }),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A, batch_request=BatchRequest(datasource_name="A"))

    assert not batch_definition_matches_batch_request(
        batch_definition=A, batch_request=BatchRequest(datasource_name="B"))

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequest(
            datasource_name="A",
            data_connector_name="a",
        ),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequest(
            datasource_name="A",
            data_connector_name="a",
            data_asset_name="aaa",
        ),
    )

    assert not batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequest(
            datasource_name="A",
            data_connector_name="a",
            data_asset_name="bbb",
        ),
    )

    assert not batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequest(
            datasource_name="A",
            data_connector_name="a",
            data_asset_name="aaa",
            partition_request={
                "partition_identifiers": {
                    "id": "B"
                },
            },
        ),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequest(partition_request={
            "partition_identifiers": {
                "id": "A"
            },
        }),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=BatchDefinition(
            **{
                "datasource_name": "FAKE_DATASOURCE",
                "data_connector_name": "TEST_DATA_CONNECTOR",
                "data_asset_name": "DEFAULT_ASSET_NAME",
                "partition_definition": PartitionDefinition({"index": "3"}),
            }),
        batch_request=BatchRequest(
            **{
                "datasource_name": "FAKE_DATASOURCE",
                "data_connector_name": "TEST_DATA_CONNECTOR",
                "data_asset_name": "DEFAULT_ASSET_NAME",
                "partition_request": None,
            }),
    )
def test__batch_definition_matches_batch_request():
    # TODO: <Alex>We need to cleanup PyCharm warnings.</Alex>
    A = BatchDefinition(
        datasource_name="A",
        data_connector_name="a",
        data_asset_name="aaa",
        batch_identifiers=IDDict({
            "id": "A",
        }),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(datasource_name="A",
                                       data_connector_name="",
                                       data_asset_name=""),
    )

    assert not batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(
            datasource_name="B", data_connector_name="", data_asset_name=""),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(datasource_name="A",
                                       data_connector_name="a",
                                       data_asset_name=""),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(datasource_name="A",
                                       data_connector_name="a",
                                       data_asset_name="aaa"),
    )

    assert not batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(datasource_name="A",
                                       data_connector_name="a",
                                       data_asset_name="bbb"),
    )

    assert not batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(
            datasource_name="A",
            data_connector_name="a",
            data_asset_name="aaa",
            data_connector_query={
                "batch_filter_parameters": {
                    "id": "B"
                },
            },
        ),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=A,
        batch_request=BatchRequestBase(
            datasource_name="",
            data_connector_name="",
            data_asset_name="",
            data_connector_query={
                "batch_filter_parameters": {
                    "id": "A"
                },
            },
        ),
    )

    assert batch_definition_matches_batch_request(
        batch_definition=BatchDefinition(
            **{
                "datasource_name": "FAKE_DATASOURCE",
                "data_connector_name": "TEST_DATA_CONNECTOR",
                "data_asset_name": "DEFAULT_ASSET_NAME",
                "batch_identifiers": IDDict({"index": "3"}),
            }),
        batch_request=BatchRequest(
            **{
                "datasource_name": "FAKE_DATASOURCE",
                "data_connector_name": "TEST_DATA_CONNECTOR",
                "data_asset_name": "DEFAULT_ASSET_NAME",
                "data_connector_query": None,
            }),
    )