def test_empty_credentials_load(self, bad_credentials): parquet_data_set = ParquetDataSet( filepath=S3_PATH, credentials={"client_kwargs": bad_credentials} ) pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): parquet_data_set.load().compute()
def test_pass_credentials(self, mocker): """Test that AWS credentials are passed successfully into boto3 client instantiation on creating S3 connection.""" mocker.patch("s3fs.core.boto3.Session.client") s3_data_set = ParquetDataSet( filepath=S3_PATH, credentials={"client_kwargs": AWS_CREDENTIALS}) pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): s3_data_set.load().compute() assert s3fs.core.boto3.Session.client.call_count == 1 args, kwargs = s3fs.core.boto3.Session.client.call_args_list[0] assert args == ("s3", ) for k, v in AWS_CREDENTIALS.items(): assert kwargs[k] == v
def test_pass_credentials(self, mocker): """Test that AWS credentials are passed successfully into boto3 client instantiation on creating S3 connection.""" client_mock = mocker.patch("botocore.session.Session.create_client") s3_data_set = ParquetDataSet(filepath=S3_PATH, credentials=AWS_CREDENTIALS) pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): s3_data_set.load().compute() assert client_mock.call_count == 1 args, kwargs = client_mock.call_args_list[0] assert args == ("s3", ) assert kwargs["aws_access_key_id"] == AWS_CREDENTIALS["key"] assert kwargs["aws_secret_access_key"] == AWS_CREDENTIALS["secret"]
def s3_data_set(load_args, save_args): return ParquetDataSet( filepath=S3_PATH, credentials={"client_kwargs": AWS_CREDENTIALS}, load_args=load_args, save_args=save_args, )
def test_incorrect_credentials_load(self): """Test that incorrect credential keys won't instantiate dataset.""" pattern = r"unexpected keyword argument" with pytest.raises(DataSetError, match=pattern): ParquetDataSet( filepath=S3_PATH, credentials={ "client_kwargs": {"access_token": "TOKEN", "access_key": "KEY"} }, ).load().compute()
def test_save_load_locally(self, tmp_path, dummy_dd_dataframe): """Test loading the data locally.""" file_path = str(tmp_path / "some" / "dir" / FILE_NAME) data_set = ParquetDataSet(filepath=file_path) assert not data_set.exists() data_set.save(dummy_dd_dataframe) assert data_set.exists() loaded_data = data_set.load() dummy_dd_dataframe.compute().equals(loaded_data.compute())