def deprecated_data_as_bundle_not_csv(tmpdir): # Create Context api.context(TEST_CONTEXT) # Create test .txt file test_txt_path = os.path.join(str(tmpdir), 'test.txt') with open(test_txt_path, 'w') as f: f.write('this should not create a bundle') # Assert the txt file exists assert os.path.exists(test_txt_path) # Try to add file to the bundle with pytest.raises(AssertionError) as ex: api.add(TEST_CONTEXT, 'bad_path', test_txt_path, treat_file_as_bundle=True) # Assert Exited with error code of 1 assert ex.type == AssertionError # Make sure bundle does not exist assert api.get( TEST_CONTEXT, 'test_file_as_bundle_txt_file') is None, 'Bundle should not exist' api.delete_context(TEST_CONTEXT)
def test_add_remote_fail(): error = None api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') # Bind remote context with just bucket try: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) except Exception as e: error = e finally: assert (type(error) == RuntimeError) # Bind remote to new context with bucket and key try: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL) except Exception as e: error = e finally: assert (type(error) == RuntimeError) api.delete_context(TEST_CONTEXT)
def test(): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Create external dep -- also creates PreMaker_auf_datamaker dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]' 2.) Remove Premaker_auf_datamaker dsdt rm PreMaker_auf_datamaker 3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker """ api.context(TEST_CONTEXT) api.apply(TEST_CONTEXT, '-', '-', 'DataMaker', params={'int_array': '[1000,2000,3000]'}) b = api.get(TEST_CONTEXT, 'PreMaker_auf_datamaker') assert (b is not None) b.rm() api.apply(TEST_CONTEXT, '-', '-', 'Root') b = api.get(TEST_CONTEXT, 'PreMaker_auf_root') assert (b is not None) api.delete_context(TEST_CONTEXT)
def test_remote_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True) assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \ 'Managed S3 file should not be copied to local' # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['test.parquet']: assert output_file in keys, 'Pipeline should have pushed file'
def test_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, NonManagedS3) print(api.cat(TEST_CONTEXT, 'b2')) # Local context should not contain file if a remote exists. b = api.search(TEST_CONTEXT, human_name='b2')[0] assert not os.path.exists( b.data['file'] [0]), 'Non Managed S3 file w/ remote should be copied to remote' assert b.data['file'][0].startswith("s3://")
def test(): """ This tests if mark_force works for tasks. We have two tasks. One depends on the other. The upstream is marked mark_force and should always run. """ def run_and_get(name, do_ext=False): api.apply(TEST_CONTEXT, 'A_2', params={'set_ext_dep': do_ext}) b = api.get(TEST_CONTEXT, 'B') print("Run {}: b.creation_date {} b.uuid {}".format( name, b.creation_date, b.uuid)) return b api.delete_context(TEST_CONTEXT) api.context(TEST_CONTEXT) b = run_and_get("One") first_uuid = b.uuid b = run_and_get("Two") assert (first_uuid != b.uuid) second_uuid = b.uuid b = run_and_get("Three", do_ext=True) assert (second_uuid == b.uuid) api.delete_context(TEST_CONTEXT)
def test(): """ Returns: """ api.context(TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True) with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b: for i in range(3): with b.add_file('output_{}'.format(i)).open('w') as of: of.write("some text for the {} file".format(i)) b.commit().push() b.rm() b.pull(localize=False) api.apply(TEST_CONTEXT, '-', 'test_output', 'ConsumeExtDep', incremental_pull=True) api.delete_context(TEST_CONTEXT, remote=True)
def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) # Try to run the pipeline - should fail try: # Run test pipeline api.apply(TEST_CONTEXT, CPush, incremental_push=True) except Exception as e: pass # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['a.txt', 'b.txt']: assert output_file in keys, 'Pipeline should have pushed file' api.delete_context(TEST_CONTEXT)
def test_push(): api.context(context_name=TEST_CONTEXT) s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) api.apply(TEST_CONTEXT, 'RemoteTest') bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' bucket.objects.all().delete() bucket.delete() api.delete_context(context_name=TEST_CONTEXT)
def test(run_test): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Run DataMaker which runs PreMaker 2.) Assert that those ran, and remove PreMaker 3.) run Root_1 which needs DataMaker (external dep) and PreMaker 4.) assert that premaker re-ran and root ran successfully (getting external dependency) """ api.context(TEST_CONTEXT) api.apply(TEST_CONTEXT, DataMaker, params={'int_array': [1000, 2000, 3000]}) b = api.get(TEST_CONTEXT, 'PreMaker') assert (b is not None) pm_uuid = b.uuid b.rm() api.apply(TEST_CONTEXT, Root_1) b = api.get(TEST_CONTEXT, 'PreMaker') assert (b is not None) assert (b.uuid != pm_uuid) b = api.get(TEST_CONTEXT, 'Root_1') assert (b is not None) api.delete_context(TEST_CONTEXT)
def test(): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Create external dep -- also creates PreMaker_auf_datamaker dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]' 2.) Remove Premaker_auf_datamaker dsdt rm PreMaker_auf_datamaker 3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker """ api.context(TEST_CONTEXT) result = None try: result = api.apply(TEST_CONTEXT, Root2, output_bundle='test_api_exit', params={}, force=True, workers=2) except Exception as e: print("Got exception {} result {} ".format(e, e.result)) assert (e.result['did_work']) assert (not e.result['success']) finally: print("API apply returned {}".format(result))
def test_add_remote(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context with just bucket api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) with api.Bundle(TEST_CONTEXT) as b: b.name = 'output' b.add_data([1, 3, 5]) b.commit() b.push() # Bind remote to new context with bucket and key api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL) with api.Bundle(TEST_CONTEXT) as b: b.name = 'output' b.add_data([1, 3, 5]) b.commit() b.push() api.delete_context(TEST_CONTEXT)
def test_pull(run_test): s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.apply(TEST_CONTEXT, RemoteTest) bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.pull(TEST_CONTEXT) pulled_bundles = api.search(TEST_CONTEXT) assert len(pulled_bundles) > 0, 'Pulled bundles down' assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data' bucket.objects.all().delete() bucket.delete()
def test_no_remote_no_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' with pytest.raises(Exception) as e: api.apply(TEST_CONTEXT, ManagedS3)
def test_create_context(): context_name = '__test__' assert context_name not in api.ls_contexts(), 'Context exists' api.context(context_name) assert context_name in api.ls_contexts(), 'Test context does exists' api.delete_context(context_name=context_name) assert context_name not in api.ls_contexts(), 'Test context exists'
def test_non_managed_local(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, NonManagedLocal) assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present' print(api.cat(TEST_CONTEXT, 'b1')) assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \ 'Local file should be present in bundle'
def context(): try: print('ensuring disdat is initialized') common.DisdatConfig.init() except: print('disdat already initialized, no worries...') print('creating temporary local context') context = uuid.uuid1().hex api.context(context) yield context print('deleting temporary local context') api.delete_context(context)
def test_independent_context(): context_1_name = '__test_context_1__' context_2_name = '__test_context_2__' api.context(context_1_name) api.context(context_2_name) api.apply(context_1_name, ContextTest) assert len(api.search(context_1_name)) == 1, 'Only one bundle should be in context one' assert len(api.search(context_2_name)) == 0, 'Context two should be empty' api.delete_context(context_name=context_1_name) api.delete_context(context_name=context_2_name) assert context_1_name not in api.ls_contexts(), 'Contexts should be removed' assert context_2_name not in api.ls_contexts(), 'Contexts should be removed'
def test_remote_no_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) with pytest.raises(Exception) as e: api.apply(TEST_CONTEXT, ManagedS3)
def _setup(remote=True): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context if remote: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) return s3_client
def test_single_file(tmpdir): # Create Context api.context(TEST_CONTEXT) # Create test .csv file test_csv_path = os.path.join(str(tmpdir), 'test.csv') df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) df.to_csv(test_csv_path) # Assert csv_file_exits assert os.path.exists(test_csv_path) # Add the file to the bundle api.add(TEST_CONTEXT, 'test_single_file', test_csv_path) # Retrieve the bundle b = api.get(TEST_CONTEXT, 'test_single_file') # Assert the bundles contain the same data bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path) assert bundle_hash == file_hash, 'Hashes do not match' # Test with tags tag = {'test': 'tag'} api.add(TEST_CONTEXT, 'test_single_file', test_csv_path, tags=tag) # Retrieve the bundle b = api.get(TEST_CONTEXT, 'test_single_file') # Assert the bundles contain the same data bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path) assert bundle_hash == file_hash, 'Hashes do not match' assert b.tags == tag, 'Tags do not match' # Remove test .csv os.remove(test_csv_path) # Assert that data still remains in the bundle assert api.get(TEST_CONTEXT, 'test_single_file') is not None, 'Bundle should exist' api.delete_context(TEST_CONTEXT)
def test_add_bad_path(tmpdir): # Create Context api.context(TEST_CONTEXT) # Create path to csv file but don't create file test_csv_path = os.path.join(str(tmpdir), 'test.csv') # Assert csv file does not exist assert not os.path.exists(test_csv_path) # Try to add file to the bundle with pytest.raises(AssertionError) as ex: api.add(TEST_CONTEXT, 'bad_path', test_csv_path) # Assert Exited with error code of 1 assert ex.type == AssertionError # Make sure bundle does not exist assert api.get(TEST_CONTEXT, 'test_file_as_bundle_txt_file') is None, 'Bundle should not exist' api.delete_context(TEST_CONTEXT)
def test_no_remote_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True) print(api.cat(TEST_CONTEXT, 'b2')) assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present' assert os.path.exists(api.search(TEST_CONTEXT, human_name='b2')[0].data['file'][0]), \ 'Non Managed S3 file should be copied to local'
def setup(): if TEST_CONTEXT in api.ls_contexts(): api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT)
""" uuid = create_bundle_from_pipeline() try: result = api.apply(TEST_CONTEXT, PipelineC, params={'ext_name': 'not a bundle name'}) except AssertionError as ae: print("ERROR: {}".format(ae)) return if __name__ == '__main__': if False: api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) test_ord_external_dependency_fail(run_test) api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) test_uuid_external_dependency_fail(run_test) api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) test_name_external_dependency_fail(run_test) else: pytest.main([__file__])
def test_add_with_treat_as_bundle(tmpdir): api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' local_paths = [] s3_paths = [] # Create and upload test.csv file key = 'test.csv' test_csv_path = os.path.join(str(tmpdir), key) df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) df.to_csv(test_csv_path) s3_resource.meta.client.upload_file(test_csv_path, TEST_BUCKET, key) s3_path = "s3://{}/{}".format(TEST_BUCKET, key) local_paths.append(test_csv_path) s3_paths.append(s3_path) # Create and uploadt test.txt file key = 'text.txt' test_txt_path = os.path.join(str(tmpdir), key) with open(test_txt_path, 'w') as f: f.write('Test') s3_resource.meta.client.upload_file(test_txt_path, TEST_BUCKET, key) s3_path = "s3://{}/{}".format(TEST_BUCKET, key) local_paths.append(test_txt_path) s3_paths.append(s3_path) bool_values = [True, False] string_values = ['a', 'b'] float_values = [1.3, 3.5] int_values = [4, 5] # Build bundle dataframe bundle_df = pd.DataFrame({ 'local_paths': local_paths, 's3_paths': s3_paths, 'bools': bool_values, 'strings': string_values, 'floats': float_values, 'ints': int_values }) bundle_df_path = os.path.join(str(tmpdir), 'bundle.csv') bundle_df.to_csv(bundle_df_path) # Add bundle dataframe api.add(TEST_CONTEXT, 'test_add_bundle', bundle_df_path, treat_file_as_bundle=True) # Assert that data in bundle is a dataframe b = api.get(TEST_CONTEXT, 'test_add_bundle') assert(isinstance(b.data, pd.DataFrame)) # Add bundle dataframe with tags tag = {'test': 'tag'} api.add(TEST_CONTEXT, 'test_add_bundle', bundle_df_path, treat_file_as_bundle=True, tags=tag) # Assert that data in bundle is a dataframe b = api.get(TEST_CONTEXT, 'test_add_bundle') assert(isinstance(b.data, pd.DataFrame)) assert b.tags == tag, 'Tags do not match' api.delete_context(TEST_CONTEXT)
def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) # Run test pipeline api.apply(TEST_CONTEXT, CIP) # Push bundles to remote for bundle_name in ['a', 'b', 'c']: assert api.get(TEST_CONTEXT, bundle_name) is not None, 'Bundle should exist' api.commit(TEST_CONTEXT, bundle_name) api.push(TEST_CONTEXT, bundle_name) # Blow away context and recreate api.delete_context(TEST_CONTEXT) assert TEST_CONTEXT not in api.ls_contexts() api.context(context_name=TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) assert api.search(TEST_CONTEXT) == [], 'Context should be empty' # Pull bundles from remote api.pull(TEST_CONTEXT) # Make sure all bundle meta data comes down but data remains in S3 for bundle_name in ['a', 'b', 'c']: bundle = api.get(TEST_CONTEXT, bundle_name) assert bundle is not None, 'Bundle should exist' data_path = bundle.data['file'][0] assert data_path.startswith('s3://'), 'Data should be in S3' # Rerun pipeline api.apply(TEST_CONTEXT, BIP, params={'n': 100}, incremental_pull=True) # Make sure all bundles exist. Bundles a and b should have local paths for bundle_name in ['a', 'b', 'c']: bundle = api.get(TEST_CONTEXT, bundle_name) assert bundle is not None, 'Bundle should exist' data_path = bundle.data['file'][0] if bundle_name in ['a', 'b']: assert not data_path.startswith('s3://'), 'Data should be local' else: assert data_path.startswith('s3://'), 'Data should be in S3' api.delete_context(TEST_CONTEXT)
def test_add_directory(tmpdir): # Create Context api.context(TEST_CONTEXT) # Directory Structure # - test.csv # - second/test_1.txt # - second/test_2.txt # - second/third/test_3.txt # - second/third/test_4.txt level_1 = '' level_2 = os.path.join(level_1, 'second') os.mkdir(os.path.join(str(tmpdir), level_2)) level_3 = os.path.join(level_2, 'third') os.mkdir(os.path.join(str(tmpdir), level_3)) # Dictionary to hold paths path_dict = {} # Create files and save paths test_csv_name = 'test.csv' test_csv_path = os.path.join(level_1, test_csv_name) test_csv_abs_path = os.path.join(str(tmpdir), test_csv_path) df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) df.to_csv(test_csv_abs_path) path_dict[test_csv_name] = (test_csv_abs_path, test_csv_path.split('/')) test_text_1_name = 'test_1.txt' test_text_1_path = os.path.join(level_2, test_text_1_name) test_text_name_1_abs_path = os.path.join(str(tmpdir), test_text_1_path) with open(test_text_name_1_abs_path, 'w') as f: f.write('Hello!') path_dict[test_text_1_name] = (test_text_name_1_abs_path, test_text_1_path.split('/')) test_text_2_name = 'test_2.txt' test_text_2_path = os.path.join(level_2, test_text_2_name) test_text_name_2_abs_path = os.path.join(str(tmpdir), test_text_2_path) with open(test_text_name_2_abs_path, 'w') as f: f.write('Hello!') path_dict[test_text_2_name] = (test_text_name_2_abs_path, test_text_2_path.split('/')) test_text_3_name = 'test_3.txt' test_text_3_path = os.path.join(level_3, test_text_3_name) test_text_name_3_abs_path = os.path.join(str(tmpdir), test_text_3_path) with open(test_text_name_3_abs_path, 'w') as f: f.write('Third Hello!') path_dict[test_text_3_name] = (test_text_name_3_abs_path, test_text_3_path.split('/')) test_text_4_name = 'test_4.txt' test_text_4_path = os.path.join(level_3, test_text_4_name) test_text_name_4_abs_path = os.path.join(str(tmpdir), test_text_4_path) with open(test_text_name_4_abs_path, 'w') as f: f.write('Third World!') path_dict[test_text_4_name] = (test_text_name_4_abs_path, test_text_4_path.split('/')) # Assert files exist assert os.path.exists(test_csv_abs_path) assert os.path.exists(test_text_name_1_abs_path) assert os.path.exists(test_text_name_2_abs_path) assert os.path.exists(test_text_name_3_abs_path) assert os.path.exists(test_text_name_4_abs_path) # Add the directory to the bundle api.add(TEST_CONTEXT, 'test_directory', str(tmpdir)) # Assert check sums are the same b = api.get(TEST_CONTEXT, 'test_directory') for f in b.data: bundle_file_name = f.split('/')[-1] local_abs_path, local_split_path = path_dict[bundle_file_name] # Make sure paths match assert get_hash(f) == get_hash(local_abs_path), 'Hashes do not match' bundle_path = os.path.join(*f.split('/')[-len(local_split_path):]) local_path = os.path.join(*local_split_path) assert local_path == bundle_path, 'Bundle should have the same directory structure' # Add the directory to the bundle with tags tag = {'test': 'tag'} api.add(TEST_CONTEXT, 'test_directory', str(tmpdir), tags=tag) # Assert check sums are the same b = api.get(TEST_CONTEXT, 'test_directory') for f in b.data: bundle_file_name = f.split('/')[-1] local_abs_path, local_split_path = path_dict[bundle_file_name] # Make sure paths match assert get_hash(f) == get_hash(local_abs_path), 'Hashes do not match' # Make sure directory structure stays the same local_path = os.path.join(*local_split_path) bundle_path = os.path.join(*f.split('/')[-len(local_split_path):]) assert local_path == bundle_path, 'Bundle should have the same directory structure' # Make sure tags exist assert b.tags == tag, 'Tags do not match' api.delete_context(TEST_CONTEXT)
def test(): """ Test the api.run() function. 1.) Create the container via the api 2.) Create a test context 3.) Call run locally 4.) Call run on AWS Batch (need to add MonkeyPatch) """ test_arg = [1000, 2000, 8000] api.context(TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_CONTEXT, TEST_REMOTE, force=True) print("--0: Create docker container") api.dockerize(SETUP_DIR, PIPELINE_CLS) print("--1: Running container locally and storing results locally...") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, no_pull=True, no_push=True) print("--1: 100 chars of RETVAL {}".format(retval[:100])) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) assert (b is not None) print("--1: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--2: Running container locally and pushing results ...") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, no_pull=True, no_push=False) print("--2: 100 chars of RETVAL {}".format(retval[:100])) print("--2B: Removing local output bundle...") api.get(TEST_CONTEXT, OUTPUT_BUNDLE).rm() print("--2C: Pulling remote bundle and verifying...") api.pull(TEST_CONTEXT) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) print("--2C: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--3: Running container on AWS pulling and pushing results ...") print("--3B: Push docker container") api.dockerize(SETUP_DIR, PIPELINE_CLS, push=True) print("--3C: Run docker container on AWS Batch") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, backend='AWSBatch') print("--3C: RETVAL {}".format(retval)) print("--3D: Pulling remote bundle and verifying...") api.pull(TEST_CONTEXT) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) print("--3D: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--4: Running with no submit ...") print("--4B: Reusing docker container") print("--4C: Submit Job on AWS Batch") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, backend='AWSBatch', no_submit=True) print("--4C: RETVAL {}".format(retval)) #api.apply(TEST_CONTEXT, '-', '-', 'Root' # ) #b = api.get(TEST_CONTEXT, 'PreMaker_auf_root') #assert(b is not None) api.delete_context(TEST_CONTEXT)