Пример #1
0
 def test_s3_not_found(self, aws_session, parquet_file_s3_1):
     bucket, _ = parquet_file_s3_1
     assert _resolve_wildcard(
         S3ParquetFile(aws_session=aws_session,
                       bucket=bucket,
                       key='not_found.parquet')) == [
                           S3ParquetFile(aws_session=aws_session,
                                         bucket=bucket,
                                         key='not_found.parquet')
                       ]
Пример #2
0
 def test_s3_double_file(self, aws_session, parquet_file_s3_1,
                         parquet_file_s3_2):
     bucket_1, key_1 = parquet_file_s3_1
     bucket_2, key_2 = parquet_file_s3_2
     with get_datafame_from_objs([
             S3ParquetFile(aws_session=aws_session,
                           bucket=bucket_1,
                           key=key_1),
             S3ParquetFile(aws_session=aws_session,
                           bucket=bucket_2,
                           key=key_2)
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2
Пример #3
0
 def test_single_s3file(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     actual = _resolve_wildcard(
         S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key))
     assert len(actual) == 1
     assert isinstance(actual[0], S3ParquetFile)
     assert actual[0].key.endswith('.parquet')
Пример #4
0
 def test_resolve_wildcard(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     actual = S3ParquetFile(aws_session=aws_session, bucket=bucket,
                            key='*').resolve_wildcard()
     assert len(actual) == 1
     assert actual[0].bucket == bucket
     assert actual[0].key == key
Пример #5
0
 def test_s3_single_file(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     with get_datafame_from_objs(
         [S3ParquetFile(aws_session=aws_session, bucket=bucket,
                        key=key)]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 1
Пример #6
0
 def test_local_and_s3_files(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     with get_datafame_from_objs([
             LocalParquetFile(path='./tests/test1.parquet'),
             S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2
Пример #7
0
 def test_s3_wildcard_file(self, aws_session, parquet_file_s3_1,
                           parquet_file_s3_2):
     bucket, _ = parquet_file_s3_1
     with get_datafame_from_objs([
             S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*'),
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2
Пример #8
0
 def test_local_and_s3_wildcard_files(self, aws_session, parquet_file_s3_1,
                                      parquet_file_s3_2):
     bucket, _ = parquet_file_s3_1
     with get_datafame_from_objs([
             LocalParquetFile(path='./tests/*'),  # hit local 3 files
             S3ParquetFile(aws_session=aws_session, bucket=bucket,
                           key='*')  # hit 2 files on s3
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * (3 + 2)
Пример #9
0
    def test_local_path(self, aws_session, parquet_file_s3_1):
        bucket, key = parquet_file_s3_1
        with S3ParquetFile(aws_session=aws_session, bucket=bucket,
                           key=key).get_local_path() as localfiles:

            assert localfiles.endswith('.parquet')
Пример #10
0
 def test_resolve_wildcard_not_found(self, aws_session, parquet_file_s3_1):
     bucket, _ = parquet_file_s3_1
     actual = S3ParquetFile(aws_session=aws_session,
                            bucket=bucket,
                            key='not_found*').resolve_wildcard()
     assert len(actual) == 0
Пример #11
0
 def test_validation(self, aws_session, bucket, key, expected):
     if not expected:
         with pytest.raises(InvalidCommandExcpetion):
             S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)
     else:
         S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)