Пример #1
0
def test_cat(run_test):

    import tempfile

    # P3 only with tempfile.TemporaryDirectory() as tmpdir:
    tmpdir = tempfile.mkdtemp()
    try:
        # Create a couple of files to throw in the bundle .csv file
        for i in range(3):
            test_csv_path = os.path.join(str(tmpdir), '{}_test.csv'.format(i))
            df = pd.DataFrame({'a': random.randint(0,10,10), 'b': random.randint(10)})
            df.to_csv(test_csv_path)
            assert os.path.exists(test_csv_path)

        # Add the file to the bundle.  Data is list[filepath,...]
        api.add(TEST_CONTEXT, TEST_BUNDLE_NAME, tmpdir)

        # Retrieve the bundle
        bundle_data = api.cat(TEST_CONTEXT, TEST_BUNDLE_NAME)

        # Assert the bundles contain the same data
        for f in bundle_data:
            i = os.path.basename(f).split('_')[0]
            bundle_hash, file_hash = get_hash(f), get_hash(os.path.join(tmpdir, '{}_test.csv'.format(i)))
            assert bundle_hash == file_hash, 'Hashes do not match'
    finally:
        shutil.rmtree(tmpdir)
Пример #2
0
def test_remote_no_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    assert b.data['file'][0].startswith("s3://")
Пример #3
0
def test_non_managed_local():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, NonManagedLocal)
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
    print(api.cat(TEST_CONTEXT, 'b1'))

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \
        'Local file should be present in bundle'
Пример #4
0
def test_remote_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    b.pull(localize=True)
    assert os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file after pull should be copied to local'

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['test.parquet']:
        assert output_file in keys, 'Pipeline should have pushed file'
Пример #5
0
def test_no_remote_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True)
    print(api.cat(TEST_CONTEXT, 'b2'))
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b2')[0].data['file'][0]), \
        'Non Managed S3 file should be copied to local'