def test_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, NonManagedS3) print(api.cat(TEST_CONTEXT, 'b2')) # Local context should not contain file if a remote exists. b = api.search(TEST_CONTEXT, human_name='b2')[0] assert not os.path.exists( b.data['file'] [0]), 'Non Managed S3 file w/ remote should be copied to remote' assert b.data['file'][0].startswith("s3://")
def test_pull(run_test): s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.apply(TEST_CONTEXT, RemoteTest) bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.pull(TEST_CONTEXT) pulled_bundles = api.search(TEST_CONTEXT) assert len(pulled_bundles) > 0, 'Pulled bundles down' assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data' bucket.objects.all().delete() bucket.delete()
def test(): """ Returns: """ api.context(TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True) with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b: for i in range(3): with b.add_file('output_{}'.format(i)).open('w') as of: of.write("some text for the {} file".format(i)) b.commit().push() b.rm() b.pull(localize=False) api.apply(TEST_CONTEXT, '-', 'test_output', 'ConsumeExtDep', incremental_pull=True) api.delete_context(TEST_CONTEXT, remote=True)
def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) # Try to run the pipeline - should fail try: # Run test pipeline api.apply(TEST_CONTEXT, CPush, incremental_push=True) except Exception as e: pass # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['a.txt', 'b.txt']: assert output_file in keys, 'Pipeline should have pushed file' api.delete_context(TEST_CONTEXT)
def test_remote_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True) assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \ 'Managed S3 file should not be copied to local' # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['test.parquet']: assert output_file in keys, 'Pipeline should have pushed file'
def test(): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Create external dep -- also creates PreMaker_auf_datamaker dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]' 2.) Remove Premaker_auf_datamaker dsdt rm PreMaker_auf_datamaker 3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker """ api.context(TEST_CONTEXT) api.apply(TEST_CONTEXT, '-', '-', 'DataMaker', params={'int_array': '[1000,2000,3000]'}) b = api.get(TEST_CONTEXT, 'PreMaker_auf_datamaker') assert (b is not None) b.rm() api.apply(TEST_CONTEXT, '-', '-', 'Root') b = api.get(TEST_CONTEXT, 'PreMaker_auf_root') assert (b is not None) api.delete_context(TEST_CONTEXT)
def test_push(): api.context(context_name=TEST_CONTEXT) s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) api.apply(TEST_CONTEXT, 'RemoteTest') bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' bucket.objects.all().delete() bucket.delete() api.delete_context(context_name=TEST_CONTEXT)
def test_ABC7(run_test): """ 7.) Run A->B->C, Run A*->B. Run A->B->C, nothing should run Args: run_test: Returns: """ result = api.apply(TEST_CONTEXT, C) assert result['success'] is True assert result['did_work'] is True B_uuid = api.get(TEST_CONTEXT, 'B').uuid def custom_B_requires(self): self.add_dependency('a', APrime, params={}) old_requires = B.pipe_requires B.pipe_requires = custom_B_requires result = api.apply(TEST_CONTEXT, B) assert result['success'] is True assert result['did_work'] is True assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid # should have a new B B.pipe_requires = old_requires result = api.apply(TEST_CONTEXT, C) assert result['success'] is True assert result['did_work'] is False
def test_AB6(run_test): """ 6.) Run A->B, Re-run A*. Run A*->B, B should re-run. Args: run_test: Returns: """ result = api.apply(TEST_CONTEXT, B) assert result['success'] is True assert result['did_work'] is True B_uuid = api.get(TEST_CONTEXT, 'B').uuid result = api.apply(TEST_CONTEXT, APrime) assert result['success'] is True assert result['did_work'] is True APrime_uuid = api.get(TEST_CONTEXT, 'APrime').uuid def custom_B_requires(self): self.add_dependency('a', APrime, params={}) old_requires = B.pipe_requires B.pipe_requires = custom_B_requires result = api.apply(TEST_CONTEXT, B) assert result['success'] is True assert result['did_work'] is True assert APrime_uuid == api.get(TEST_CONTEXT, 'APrime').uuid assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid B.pipe_requires = old_requires
def test_ord_external_dependency_fail(run_test): """ Test ability to handle a failed lookup. Note: Disdat/Luigi swallows exceptions in tasks. Here our tasks assert that they get back a bundle on their lookup. If we catch it, then the test succeeds. Args: run_test: Returns: """ uuid = create_bundle_from_pipeline() result = api.apply(TEST_CONTEXT, PipelineA, params={ 'test_param': 'never run before', 'throw_assert': False }) assert result['success'] is True try: result = api.apply(TEST_CONTEXT, PipelineA, params={'test_param': 'never run before'}) except AssertionError as ae: print("ERROR: {}".format(ae)) return
def test(run_test): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Run DataMaker which runs PreMaker 2.) Assert that those ran, and remove PreMaker 3.) run Root_1 which needs DataMaker (external dep) and PreMaker 4.) assert that premaker re-ran and root ran successfully (getting external dependency) """ api.context(TEST_CONTEXT) api.apply(TEST_CONTEXT, DataMaker, params={'int_array': [1000, 2000, 3000]}) b = api.get(TEST_CONTEXT, 'PreMaker') assert (b is not None) pm_uuid = b.uuid b.rm() api.apply(TEST_CONTEXT, Root_1) b = api.get(TEST_CONTEXT, 'PreMaker') assert (b is not None) assert (b.uuid != pm_uuid) b = api.get(TEST_CONTEXT, 'Root_1') assert (b is not None) api.delete_context(TEST_CONTEXT)
def test_name_external_dependency(): uuid = create_bundle_from_pipeline() print("UUID of created bundle is {}".format(uuid)) # Ext dep by human name api.apply(TEST_CONTEXT, PipelineC, params={'ext_name': EXT_BUNDLE_NAME})
def test_uuid_external_dependency(): uuid = create_bundle_from_pipeline() print("UUID of created bundle is {}".format(uuid)) # Ext dep by specific UUID api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid})
def test_ord_external_dependency(): uuid = create_bundle_from_pipeline() print("UUID of created bundle is {}".format(uuid)) # Ordinary ext dep api.apply(TEST_CONTEXT, PipelineA)
def test_no_remote_no_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' with pytest.raises(Exception) as e: api.apply(TEST_CONTEXT, ManagedS3)
def test_uuid_external_dependency(run_test): uuid = create_bundle_from_pipeline() api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid}) result = api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid}) assert result['success'] is True assert result['did_work'] is False
def create_bundle_from_pipeline(): """ Run the internal pipeline, create a bundle, return the uuid """ api.apply(TEST_CONTEXT, ExternalPipeline, params={'test_param': EXT_TASK_PARAM_VAL}, output_bundle=EXT_BUNDLE_NAME) b = api.get(TEST_CONTEXT, EXT_BUNDLE_NAME) return b.uuid
def test_float_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, FloatTask) data = api.get(TEST_CONTEXT, 'float_task').data assert data == 2.5, 'Data did not match output' assert type(data) == float, 'Data is not float' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_string_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, StringTask) data = api.get(TEST_CONTEXT, 'string_task').data assert data == 'output', 'Data did not match output' assert type(data) == six.text_type, 'Data is not string' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_list_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, ListTask) data = api.get(TEST_CONTEXT, 'list_task').data assert np.array_equal(data, [1, 2, 3]), 'Data did not match output' assert type(data) == np.ndarray, 'Data is not list' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_dependant_tasks(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, 'C') data = api.get(TEST_CONTEXT, 'c').data assert data == 6, 'Data did not match output' assert type(data) == int, 'Data is not path' assert len( api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'
def test_dict_task(): setup() assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, DictTask) data = api.get(TEST_CONTEXT, 'dict_task').data assert data == {'hello': ['world']}, 'Data did not match output' assert type(data) == dict, 'Data is not dict' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_file_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, FileTask) output_path = api.get(TEST_CONTEXT, 'file_task').data with open(output_path) as f: output = f.read() assert output == '5', 'Data did not match output' assert type(output_path) == str, 'Data is not path' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_df_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, DataFrameTask) data = api.get(TEST_CONTEXT, 'df_task').data df = pd.DataFrame() df['a'] = [1, 2, 3] assert df.equals(data), 'Data did not match output' assert type(data) == pd.DataFrame, 'Data is not df' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_non_managed_local(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, NonManagedLocal) assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present' print(api.cat(TEST_CONTEXT, 'b1')) assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \ 'Local file should be present in bundle'
def test(run_test): """ This tests if apply force=True and force_all=True re-run everything. We have two tasks. One depends on the other. force_all should re-run both, force should re-run only the last. """ # first run there should be no bundles #assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, A, params={}) first_B_uuid = api.get(TEST_CONTEXT, 'B').uuid first_A_uuid = api.get(TEST_CONTEXT, 'A').uuid # second, force re-run last task api.apply(TEST_CONTEXT, A, force=True, params={}) one_B_uuid = api.get(TEST_CONTEXT, 'B').uuid one_A_uuid = api.get(TEST_CONTEXT, 'A').uuid assert (first_B_uuid == one_B_uuid) assert (first_A_uuid != one_A_uuid) # second, force all to re-run. api.apply(TEST_CONTEXT, A, force_all=True, params={}) all_B_uuid = api.get(TEST_CONTEXT, 'B').uuid all_A_uuid = api.get(TEST_CONTEXT, 'A').uuid assert (all_B_uuid != one_B_uuid) assert (all_A_uuid != one_A_uuid) # third, make sure a force_all doesn't crash if there is an external bundle. api.apply(TEST_CONTEXT, A, force_all=True, params={'set_ext_dep': True}) final_B_uuid = api.get(TEST_CONTEXT, 'B').uuid final_A_uuid = api.get(TEST_CONTEXT, 'A').uuid assert (final_B_uuid == all_B_uuid) assert (final_A_uuid != all_A_uuid)
def test_single_task(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, 'A') data = api.get(TEST_CONTEXT, 'a').data assert data == 2, 'Data did not match output' assert type(data) == int, 'Data is not path' assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present' api.apply(TEST_CONTEXT, 'A') assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test(): """ Purpose of this test is to have one task that produces a bundle. And another task that requires it. 1.) Create external dep -- also creates PreMaker_auf_datamaker dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]' 2.) Remove Premaker_auf_datamaker dsdt rm PreMaker_auf_datamaker 3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker """ api.context(TEST_CONTEXT) result = None try: result = api.apply(TEST_CONTEXT, Root2, output_bundle='test_api_exit', params={}, force=True, workers=2) except Exception as e: print("Got exception {} result {} ".format(e, e.result)) assert (e.result['did_work']) assert (not e.result['success']) finally: print("API apply returned {}".format(result))
def test_AB4(run_test): """ 4.) Run A->B, Re-run A*. Run A->B, nothing should run. """ result = api.apply(TEST_CONTEXT, B) assert result['success'] is True assert result['did_work'] is True result = api.apply(TEST_CONTEXT, A, params={'a': 2, 'b': 3}) assert result['success'] is True assert result['did_work'] is True result = api.apply(TEST_CONTEXT, B) assert result['success'] is True assert result['did_work'] is False
def test_task_with_parameter(): assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, 'B', params={'n': 10}) data = api.get(TEST_CONTEXT, 'b').data assert data == 20, 'Data did not match output' assert type(data) == int, 'Data is not path' assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present' api.apply(TEST_CONTEXT, 'B', params={'n': 20}) data = api.get(TEST_CONTEXT, 'b').data assert data == 40, 'Data did not match output' assert type(data) == int, 'Data is not path' assert len(api.search(TEST_CONTEXT)) == 2, 'Two bundles should be present'