def test_s3_not_found(self, aws_session, parquet_file_s3_1): bucket, _ = parquet_file_s3_1 assert _resolve_wildcard( S3ParquetFile(aws_session=aws_session, bucket=bucket, key='not_found.parquet')) == [ S3ParquetFile(aws_session=aws_session, bucket=bucket, key='not_found.parquet') ]
def test_s3_double_file(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket_1, key_1 = parquet_file_s3_1 bucket_2, key_2 = parquet_file_s3_2 with get_datafame_from_objs([ S3ParquetFile(aws_session=aws_session, bucket=bucket_1, key=key_1), S3ParquetFile(aws_session=aws_session, bucket=bucket_2, key=key_2) ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_single_s3file(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 actual = _resolve_wildcard( S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)) assert len(actual) == 1 assert isinstance(actual[0], S3ParquetFile) assert actual[0].key.endswith('.parquet')
def test_resolve_wildcard(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 actual = S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*').resolve_wildcard() assert len(actual) == 1 assert actual[0].bucket == bucket assert actual[0].key == key
def test_s3_single_file(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 with get_datafame_from_objs( [S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 1
def test_local_and_s3_files(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 with get_datafame_from_objs([ LocalParquetFile(path='./tests/test1.parquet'), S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key) ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_s3_wildcard_file(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket, _ = parquet_file_s3_1 with get_datafame_from_objs([ S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*'), ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_local_and_s3_wildcard_files(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket, _ = parquet_file_s3_1 with get_datafame_from_objs([ LocalParquetFile(path='./tests/*'), # hit local 3 files S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*') # hit 2 files on s3 ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * (3 + 2)
def test_local_path(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 with S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key).get_local_path() as localfiles: assert localfiles.endswith('.parquet')
def test_resolve_wildcard_not_found(self, aws_session, parquet_file_s3_1): bucket, _ = parquet_file_s3_1 actual = S3ParquetFile(aws_session=aws_session, bucket=bucket, key='not_found*').resolve_wildcard() assert len(actual) == 0
def test_validation(self, aws_session, bucket, key, expected): if not expected: with pytest.raises(InvalidCommandExcpetion): S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key) else: S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)