Пример #1
0
    def test_download_table_as_df(self, mocker):
        self._stop_time(mocker)
        mocked_gcs_to_df = mocker.patch("feast.sdk.utils.bq_util.gcs_to_df",
                                        return_value=None)

        staging_path = "gs://temp/"
        staging_file_name = "temp_0"
        table_id = "project_id.dataset_id.table_id"

        table_dldr = TableDownloader()
        exp_staging_path = os.path.join(staging_path, staging_file_name)

        table_dldr._bq = _Mock_BQ_Client()
        mocker.patch.object(table_dldr._bq,
                            "extract_table",
                            return_value=_Job())

        table_dldr.download_table_as_df(table_id,
                                        staging_location=staging_path)

        assert len(table_dldr._bq.extract_table.call_args_list) == 1
        args, kwargs = table_dldr._bq.extract_table.call_args_list[0]
        assert args[0].full_table_id == Table.from_string(
            table_id).full_table_id
        assert args[1] == exp_staging_path
        assert kwargs["job_config"].destination_format == "CSV"
        mocked_gcs_to_df.assert_called_once_with(exp_staging_path)
Пример #2
0
    def _test_download_file(self, mocker, type):
        staging_path = "gs://temp/"
        staging_file_name = "temp_0"
        dst_path = "/tmp/myfile.csv"
        table_id = "project_id.dataset_id.table_id"

        table_dldr = TableDownloader()
        mock_blob = _Blob()
        mocker.patch.object(mock_blob, "download_to_filename")
        table_dldr._bq = _Mock_BQ_Client()
        mocker.patch.object(table_dldr._bq,
                            "extract_table",
                            return_value=_Job())
        table_dldr._gcs = _Mock_GCS_Client()
        mocker.patch.object(table_dldr._gcs,
                            "get_bucket",
                            return_value=_Bucket(mock_blob))

        table_dldr.download_table_as_file(table_id,
                                          dst_path,
                                          staging_location=staging_path,
                                          file_type=type)

        exp_staging_path = os.path.join(staging_path, staging_file_name)
        assert len(table_dldr._bq.extract_table.call_args_list) == 1
        args, kwargs = table_dldr._bq.extract_table.call_args_list[0]
        assert args[0].full_table_id == Table.from_string(
            table_id).full_table_id
        assert args[1] == exp_staging_path
        assert kwargs["job_config"].destination_format == str(type)

        mock_blob.download_to_filename.assert_called_once_with(dst_path)
Пример #3
0
    def __init__(self, core_url=None, serving_url=None, verbose=False):
        """Create an instance of Feast client which is connected to feast
        endpoint specified in the parameter. If no url is provided, the
        client will default to the url specified in the environment variable
        FEAST_CORE_URL.

        Args:
            core_url (str, optional): feast's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
            serving_url (str, optional): feast serving's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
        """

        if core_url is None:
            core_url = os.getenv(FEAST_CORE_URL_ENV_KEY)
        self._core_url = core_url

        if serving_url is None:
            serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY)
        self._serving_url = serving_url

        self.__core_channel = None
        self.__serving_channel = None
        self._core_service_stub = None
        self._job_service_stub = None
        self._dataset_service_stub = None
        self._serving_service_stub = None

        self._verbose = verbose
        self._table_downloader = TableDownloader()
Пример #4
0
    def _test_download_file(self, mocker, type):
        mocked_gcs_folder_to_file = mocker.patch(
            "feast.sdk.utils.bq_util.gcs_folder_to_file", return_value=None)

        staging_path = "gs://temp"
        temp_folder = "temp_0"
        full_table_id = "project_id.dataset_id.table_id"
        dst_path = "/tmp/myfile.csv"

        exp_staging_folder = os.path.join(staging_path, temp_folder)
        exp_staging_path = os.path.join(exp_staging_folder, "shard_*")

        table_dldr = TableDownloader()
        table_dldr._bqclient = _Mock_BQ_Client()
        mocker.patch.object(table_dldr._bqclient,
                            "extract_table",
                            return_value=_Job())

        table_dldr.download_table_as_file(full_table_id,
                                          dst_path,
                                          staging_location=staging_path,
                                          file_type=type)

        assert len(table_dldr._bqclient.extract_table.call_args_list) == 1
        args, kwargs = table_dldr._bqclient.extract_table.call_args_list[0]
        assert args[0].full_table_id == Table.from_string(
            full_table_id).full_table_id
        assert args[1] == exp_staging_path
        assert kwargs["job_config"].destination_format == str(type)
        mocked_gcs_folder_to_file.assert_called_once_with(
            exp_staging_folder, dst_path)
Пример #5
0
    def test_download_invalid_staging_url(self):
        table_id = "project_id.dataset_id.table_id"
        table_dldr = TableDownloader()
        with pytest.raises(ValueError,
                           match="staging_uri must be a directory in "
                           "GCS"):
            table_dldr.download_table_as_file(table_id, "/tmp/dst",
                                              "/local/directory", FileType.CSV)

        with pytest.raises(ValueError,
                           match="staging_uri must be a directory in "
                           "GCS"):
            table_dldr.download_table_as_df(table_id, "/local/directory")
Пример #6
0
    def test_download_dataset_as_file(self, client, mocker):
        destination = "/tmp/dest_file"

        table_dlder = TableDownloader()
        mocker.patch.object(
            table_dlder, "download_table_as_file", return_value=destination)

        client._table_downloader = table_dlder
        full_table_id = "project.dataset.table"
        staging_location = "gs://gcs_bucket/"
        dataset = DatasetInfo("mydataset", full_table_id)

        result = client.download_dataset(
            dataset,
            destination,
            staging_location=staging_location,
            file_type=FileType.CSV)

        assert result == destination
        table_dlder.download_table_as_file.assert_called_once_with(
            full_table_id, destination, staging_location, FileType.CSV)