def test_fetch_by_status(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.FAILURE) ) assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.NOT_STARTED)} == { one } assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.STARTED)} == { two, three, } assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.FAILURE)} == {four} assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.SUCCESS)} == set()
def test_add_get_postgres_run_storage(pg_db): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.all_runs() == [run_to_add] assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add] assert run_storage.all_runs_for_pipeline('nope') == [] run_storage.wipe() assert run_storage.all_runs() == []
def test_postgres_instance(multi_postgres): run_storage_conn_string, event_log_storage_conn_string = multi_postgres run_storage = PostgresRunStorage.create_clean_storage( run_storage_conn_string) event_storage = PostgresEventLogStorage.create_clean_storage( event_log_storage_conn_string) with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_storage, event_storage=event_storage, compute_log_manager=LocalComputeLogManager(temp_dir), ) result = execute_pipeline(simple, instance=instance) assert run_storage.has_run(result.run_id) assert run_storage.get_run_by_id( result.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_storage.get_logs_for_run(result.run_id) if event.is_dagster_event ] stats = event_storage.get_stats_for_run(result.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def test_nuke(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage run_id = str(uuid.uuid4()) storage.add_run(build_run(run_id=run_id, pipeline_name='some_pipeline')) assert len(storage.all_runs()) == 1 storage.wipe() assert list(storage.all_runs()) == []
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun.create_empty_run(pipeline_name='pipeline_name', run_id=run_id) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def clean_run_storage(conn_string): check.invariant( TestPostgresInstance.dagster_postgres_installed(), "dagster_postgres must be installed to test with postgres", ) from dagster_postgres.run_storage import PostgresRunStorage # pylint: disable=import-error storage = PostgresRunStorage.create_clean_storage(conn_string) assert storage return storage
def test_fetch_by_pipeline(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline')) storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline')) assert len(storage.all_runs()) == 2 some_runs = storage.all_runs_for_pipeline('some_pipeline') assert len(some_runs) == 1 assert some_runs[0].run_id == one
def test_fetch_by_tag(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'goodbye'})) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline')) assert len(storage.all_runs()) == 3 some_runs = storage.all_runs_for_tag('mytag', 'hello') assert len(some_runs) == 1 assert some_runs[0].run_id == one
def test_add_get_postgres_run_storage(pg_db): run_storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id) added = run_storage.add_run(run_to_add) assert added fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.all_runs() == [run_to_add] assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add] assert run_storage.all_runs_for_pipeline('nope') == [] run_storage.wipe() assert run_storage.all_runs() == []
def test_fetch_by_status_cursored(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED) ) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) cursor_four_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=four) assert len(cursor_four_runs) == 2 assert {run.run_id for run in cursor_four_runs} == {one, two} cursor_two_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=two) assert len(cursor_two_runs) == 1 assert {run.run_id for run in cursor_two_runs} == {one} cursor_one_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=one) assert not cursor_one_runs cursor_four_limit_one = storage.get_runs_for_status( PipelineRunStatus.STARTED, cursor=four, limit=1 ) assert len(cursor_four_limit_one) == 1 assert cursor_four_limit_one[0].run_id == two
def test_slice(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) one, two, three = sorted([str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())]) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) all_runs = storage.all_runs() assert len(all_runs) == 3 sliced_runs = storage.all_runs(cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.all_runs_for_pipeline('some_pipeline') assert len(all_runs) == 3 sliced_runs = storage.all_runs_for_pipeline('some_pipeline', cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.all_runs_for_tag('mytag', 'hello') assert len(all_runs) == 3 sliced_runs = storage.all_runs_for_tag('mytag', 'hello', cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def clean_storage(conn_string): # pylint: disable=redefined-outer-name storage = PostgresRunStorage.create_clean_storage(conn_string) assert storage return storage
def run_storage(self, conn_string): # pylint: disable=arguments-differ storage = PostgresRunStorage.create_clean_storage(conn_string) assert storage return storage