def test_retry_early_terminate(self, graphql_context): instance = graphql_context.instance selector = infer_pipeline_selector( graphql_context, "retry_multi_input_early_terminate_pipeline" ) run_id = make_new_run_id() execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "solids": { "get_input_one": {"config": {"wait_to_terminate": True}}, "get_input_two": {"config": {"wait_to_terminate": True}}, }, }, "executionMetadata": {"runId": run_id}, } }, ) # Wait until the first step succeeded while instance.get_run_stats(run_id).steps_succeeded < 1: sleep(0.1) # Terminate the current pipeline run at the second step graphql_context.instance.run_launcher.terminate(run_id) records = instance.all_logs(run_id) # The first step should succeed, the second should fail or not start, # and the following steps should not appear in records assert step_did_succeed_in_records(records, "return_one") assert not step_did_fail_in_records(records, "return_one") assert any( [ step_did_fail_in_records(records, "get_input_one"), step_did_not_run_in_records(records, "get_input_one"), ] ) assert step_did_not_run_in_records(records, "get_input_two") assert step_did_not_run_in_records(records, "sum_inputs") # Wait for the original run to finish poll_for_finished_run(instance, run_id, timeout=30) assert instance.get_run_by_id(run_id).status == PipelineRunStatus.CANCELED # Start retry new_run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "solids": { "get_input_one": {"config": {"wait_to_terminate": False}}, "get_input_two": {"config": {"wait_to_terminate": False}}, }, }, "executionMetadata": { "runId": new_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, } }, ) retry_records = instance.all_logs(new_run_id) # The first step should not run and the other three steps should succeed in retry assert step_did_not_run_in_records(retry_records, "return_one") assert step_did_succeed_in_records(retry_records, "get_input_one") assert step_did_succeed_in_records(retry_records, "get_input_two") assert step_did_succeed_in_records(retry_records, "sum_inputs")
def test_successful_pipeline_reexecution(snapshot): run_id = str(uuid.uuid4()) result_one = execute_dagster_graphql( define_context(), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert (result_one.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess') snapshot.assert_match(result_one.data) expected_value_repr = ( '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), ''' '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), ''' '''('sum_sq', 49)])]''') store = FileSystemIntermediateStore(run_id) assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame)) == expected_value_repr) new_run_id = str(uuid.uuid4()) result_two = execute_dagster_graphql( define_context(), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': new_run_id }, 'mode': 'default', }, 'reexecutionConfig': { 'previousRunId': run_id, 'stepOutputHandles': [{ 'stepKey': 'sum_solid.compute', 'outputName': 'result' }], }, }, ) query_result = result_two.data['startPipelineExecution'] assert query_result['__typename'] == 'StartPipelineExecutionSuccess' logs = query_result['run']['logs']['nodes'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent') assert not get_step_output_event(logs, 'sum_solid.compute') assert get_step_output_event(logs, 'sum_sq_solid.compute') snapshot.assert_match(result_two.data) store = FileSystemIntermediateStore(new_run_id) assert not store.has_intermediate(None, 'sum_solid.inputs.num.read', 'input_thunk_output') assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame)) == expected_value_repr)
def test_production_query(graphql_context): result = execute_dagster_graphql(graphql_context, PRODUCTION_QUERY) assert not result.errors assert result.data
def test_asset_in_progress(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "hanging_job") run_id = "foo" with safe_tempfile_path() as path: result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "mode": "default", "runConfigData": { "resources": { "hanging_asset_resource": { "config": { "file": path } } } }, "executionMetadata": { "runId": run_id }, } }, ) assert not result.errors assert result.data # ensure the execution has happened while not os.path.exists(path): time.sleep(0.1) result = execute_dagster_graphql( graphql_context, GET_ASSET_IN_PROGRESS_RUNS, variables={ "repositorySelector": infer_repository_selector(graphql_context) }, ) graphql_context.instance.run_launcher.terminate(run_id) assert result.data assert result.data["repositoryOrError"] assert result.data["repositoryOrError"]["inProgressRunsByStep"] in_progress_runs_by_step = result.data["repositoryOrError"][ "inProgressRunsByStep"] assert len(in_progress_runs_by_step) == 2 hanging_asset_status = in_progress_runs_by_step[0] never_runs_asset_status = in_progress_runs_by_step[1] # graphql endpoint returns unordered list of steps # swap if never_runs_asset_status is first in list if hanging_asset_status["stepKey"] != "hanging_asset": never_runs_asset_status, hanging_asset_status = ( hanging_asset_status, never_runs_asset_status, ) assert hanging_asset_status["stepKey"] == "hanging_asset" assert len(hanging_asset_status["inProgressRuns"]) == 1 assert hanging_asset_status["inProgressRuns"][0]["runId"] == run_id assert len(hanging_asset_status["unstartedRuns"]) == 0 assert never_runs_asset_status["stepKey"] == "never_runs_asset" assert len(never_runs_asset_status["inProgressRuns"]) == 0 assert len(never_runs_asset_status["unstartedRuns"]) == 1 assert never_runs_asset_status["unstartedRuns"][0][ "runId"] == run_id
def execute(self, gql_query, variable_values=None): return execute_dagster_graphql( graphql_context, gql_query, variable_values, ).data
def test_type_rendering(graphql_context): selector = infer_pipeline_selector(graphql_context, "more_complicated_nested_config") result = execute_dagster_graphql(graphql_context, TYPE_RENDER_QUERY, {"selector": selector}) assert not result.errors assert result.data
def test_load_workspace(self, graphql_context): # Add an error origin original_origins = location_origins_from_yaml_paths( [file_relative_path(__file__, "multi_location.yaml")] ) with mock.patch( "dagster.core.workspace.load_target.location_origins_from_yaml_paths", ) as origins_mock: original_origins.append( ManagedGrpcPythonEnvRepositoryLocationOrigin( location_name="error_location", loadable_target_origin=LoadableTargetOrigin( python_file="made_up_file.py", executable_path=sys.executable ), ) ) origins_mock.return_value = original_origins reload_time = time.time() new_context = graphql_context.reload_workspace() result = execute_dagster_graphql(new_context, WORKSPACE_QUERY) assert result assert result.data assert result.data["workspaceOrError"] assert result.data["workspaceOrError"]["__typename"] == "Workspace", str(result.data) nodes = result.data["workspaceOrError"]["locationEntries"] assert len(nodes) == 3 assert all([node["__typename"] == "WorkspaceLocationEntry" for node in nodes]), str( nodes ) success_nodes = [ node for node in nodes if node["locationOrLoadError"]["__typename"] == "RepositoryLocation" ] assert len(success_nodes) == 2 failures = [ node for node in nodes if node["locationOrLoadError"]["__typename"] == "PythonError" ] assert len(failures) == 1 failure_node = failures[0] assert failure_node["name"] == "error_location" assert failure_node["loadStatus"] == "LOADED" assert "No such file or directory" in failure_node["locationOrLoadError"]["message"] for node in nodes: assert node["loadStatus"] == "LOADED" update_time = node["updatedTimestamp"] assert update_time >= reload_time and update_time <= time.time() metadatas = node["displayMetadata"] metadata_dict = {metadata["key"]: metadata["value"] for metadata in metadatas} assert ( "python_file" in metadata_dict or "module_name" in metadata_dict or "package_name" in metadata_dict )
def test_reload_workspace(self, graphql_context): result = execute_dagster_graphql(graphql_context, RELOAD_WORKSPACE_QUERY) assert result assert result.data assert result.data["reloadWorkspace"] assert result.data["reloadWorkspace"][ "__typename"] == "RepositoryLocationConnection" nodes = result.data["reloadWorkspace"]["nodes"] assert len(nodes) == 2 assert all( [node["__typename"] == "RepositoryLocation" for node in nodes]) original_origins = location_origins_from_yaml_paths( [file_relative_path(__file__, "multi_location.yaml")]) # simulate removing all the origins with mock.patch( "dagster.cli.workspace.cli_target.location_origins_from_yaml_paths", ) as origins_mock: # simulate removing an origin, reload origins_mock.return_value = original_origins[0:1] result = execute_dagster_graphql(graphql_context, RELOAD_WORKSPACE_QUERY) assert result assert result.data assert result.data["reloadWorkspace"] assert result.data["reloadWorkspace"][ "__typename"] == "RepositoryLocationConnection" nodes = result.data["reloadWorkspace"]["nodes"] assert len(nodes) == 1 assert all([ node["__typename"] == "RepositoryLocation" and node["loadStatus"] == "LOADED" for node in nodes ]) # Simulate adding an origin with an error, reload original_origins.append( ManagedGrpcPythonEnvRepositoryLocationOrigin( location_name="error_location", loadable_target_origin=LoadableTargetOrigin( python_file="made_up_file.py", executable_path=sys.executable), )) origins_mock.return_value = original_origins result = execute_dagster_graphql(graphql_context, RELOAD_WORKSPACE_QUERY) assert result assert result.data assert result.data["reloadWorkspace"] assert result.data["reloadWorkspace"][ "__typename"] == "RepositoryLocationConnection" nodes = result.data["reloadWorkspace"]["nodes"] assert len(nodes) == 3 assert (len([ node for node in nodes if node["__typename"] == "RepositoryLocation" and node["loadStatus"] == "LOADED" ]) == 2) failures = [ node for node in nodes if node["__typename"] == "RepositoryLocationLoadFailure" ] assert len(failures) == 1 assert failures[0]["name"] == "error_location" assert failures[0]["loadStatus"] == "LOADED" # Add another origin without an error, reload original_origins.append( original_origins[0]._replace(location_name="location_copy")) origins_mock.return_value = original_origins result = execute_dagster_graphql(graphql_context, RELOAD_WORKSPACE_QUERY) nodes = result.data["reloadWorkspace"]["nodes"] assert len(nodes) == 4 assert len([ node for node in nodes if node["__typename"] == "RepositoryLocation" ]) == 3 failures = [ node for node in nodes if node["__typename"] == "RepositoryLocationLoadFailure" ] assert len(failures) == 1 assert "location_copy" in [node["name"] for node in nodes] assert original_origins[0].location_name in [ node["name"] for node in nodes ] # Finally, update one of the origins' location names original_origins[0] = original_origins[0]._replace( location_name="new_location_name") result = execute_dagster_graphql(graphql_context, RELOAD_WORKSPACE_QUERY) nodes = result.data["reloadWorkspace"]["nodes"] assert len(nodes) == 4 assert len([ node for node in nodes if node["__typename"] == "RepositoryLocation" ]) == 3 failures = [ node for node in nodes if node["__typename"] == "RepositoryLocationLoadFailure" ] assert len(failures) == 1 assert "new_location_name" in [node["name"] for node in nodes]
def test_query_all_solids(graphql_context, snapshot): result = execute_dagster_graphql(graphql_context, all_solids_query()) snapshot.assert_match(result.data)
def test_out_of_process_reload_location(self, graphql_context): result = execute_dagster_graphql(graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert result.data["reloadRepositoryLocation"][ "__typename"] == "RepositoryLocation" assert result.data["reloadRepositoryLocation"]["name"] == "test" repositories = result.data["reloadRepositoryLocation"]["repositories"] assert len(repositories) == 1 assert repositories[0]["name"] == "test_repo" assert result.data["reloadRepositoryLocation"][ "isReloadSupported"] is True with mock.patch( # note it where the function is *used* that needs to mocked, not # where it is defined. # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch "dagster.core.host_representation.repository_location.sync_list_repositories_grpc" ) as cli_command_mock: with mock.patch( # note it where the function is *used* that needs to mocked, not # where it is defined. # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch "dagster.core.host_representation.repository_location.sync_get_streaming_external_repositories_data_grpc" ) as external_repository_mock: @repository def new_repo(): return [] new_repo_data = external_repository_data_from_def(new_repo) external_repository_mock.return_value = { "new_repo": new_repo_data } cli_command_mock.return_value = ListRepositoriesResponse( repository_symbols=[], executable_path=sys.executable, repository_code_pointer_dict={ "new_repo": CodePointer.from_python_file(__file__, "new_repo", None) }, ) result = execute_dagster_graphql( graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}, ) assert cli_command_mock.call_count == 1 assert external_repository_mock.call_count == 1 repositories = result.data["reloadRepositoryLocation"][ "repositories"] assert len(repositories) == 1 assert repositories[0]["name"] == "new_repo"
def test_reload_failure(self, graphql_context): result = execute_dagster_graphql(graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert result.data["reloadRepositoryLocation"][ "__typename"] == "RepositoryLocation" assert result.data["reloadRepositoryLocation"]["name"] == "test" repositories = result.data["reloadRepositoryLocation"]["repositories"] assert len(repositories) == 1 assert repositories[0]["name"] == "test_repo" assert result.data["reloadRepositoryLocation"][ "isReloadSupported"] is True with mock.patch( # note it where the function is *used* that needs to mocked, not # where it is defined. # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch "dagster.core.host_representation.repository_location.sync_list_repositories_grpc" ) as cli_command_mock: cli_command_mock.side_effect = Exception( "Mocked repository load failure") result = execute_dagster_graphql( graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}, ) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert (result.data["reloadRepositoryLocation"]["__typename"] == "RepositoryLocationLoadFailure") assert result.data["reloadRepositoryLocation"]["name"] == "test" assert ( "Mocked repository load failure" in result.data["reloadRepositoryLocation"]["error"]["message"]) # Verify failure is idempotent result = execute_dagster_graphql( graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}, ) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert (result.data["reloadRepositoryLocation"]["__typename"] == "RepositoryLocationLoadFailure") assert result.data["reloadRepositoryLocation"]["name"] == "test" assert ( "Mocked repository load failure" in result.data["reloadRepositoryLocation"]["error"]["message"]) # can be reloaded again successfully result = execute_dagster_graphql( graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}, ) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert result.data["reloadRepositoryLocation"][ "__typename"] == "RepositoryLocation" assert result.data["reloadRepositoryLocation"]["name"] == "test" assert result.data["reloadRepositoryLocation"][ "loadStatus"] == "LOADED" repositories = result.data["reloadRepositoryLocation"]["repositories"] assert len(repositories) == 1 assert repositories[0]["name"] == "test_repo" assert result.data["reloadRepositoryLocation"][ "isReloadSupported"] is True
def test_launch_from_failure(self, graphql_context): repository_selector = infer_repository_selector(graphql_context) partition_set_selector = { "repositorySelector": repository_selector, "partitionSetName": "chained_integer_partition", } # trigger failure in the conditionally_fail solid output_file = os.path.join( get_system_temp_directory(), "chained_failure_pipeline_conditionally_fail") try: with open(output_file, "w"): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], } }, ) finally: os.remove(output_file) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" # re-execute from failure (without the failure file) result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], "fromFailure": True, } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" backfill_id = result.data["launchPartitionBackfill"]["backfillId"] result = execute_dagster_graphql(graphql_context, PARTITION_PROGRESS_QUERY, variables={"backfillId": backfill_id}) assert not result.errors assert result.data assert result.data["partitionBackfillOrError"][ "__typename"] == "PartitionBackfill" assert result.data["partitionBackfillOrError"]["status"] == "REQUESTED" assert result.data["partitionBackfillOrError"]["isPersisted"] assert result.data["partitionBackfillOrError"]["numRequested"] == 0 assert result.data["partitionBackfillOrError"]["numTotal"] == 2 assert result.data["partitionBackfillOrError"]["fromFailure"]
def test_execute_hammer_through_dagit(): recon_repo = ReconstructableRepository.for_file( file_relative_path( __file__, '../../../dagster-test/dagster_test/toys/hammer.py'), 'hammer_pipeline', ) instance = DagsterInstance.local_temp() context = DagsterGraphQLContext( locations=[InProcessRepositoryLocation(recon_repo)], instance=instance, ) selector = infer_pipeline_selector(context, 'hammer_pipeline') executor = SyncExecutor() variables = { 'executionParams': { 'runConfigData': { 'storage': { 'filesystem': {} }, 'execution': { 'dask': { 'config': { 'cluster': { 'local': {} } } } }, }, 'selector': selector, 'mode': 'default', } } start_pipeline_result = graphql( request_string=LAUNCH_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) if start_pipeline_result.errors: raise Exception('{}'.format(start_pipeline_result.errors)) run_id = start_pipeline_result.data['launchPipelineExecution']['run'][ 'runId'] context.drain_outstanding_executions() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [ x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages'] ] assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages
def test_basic_execute_plan_with_materialization(graphql_context): selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') with get_temp_file_name() as out_csv_path: run_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') }, 'outputs': [{ 'result': out_csv_path }], } } } result = execute_dagster_graphql( graphql_context, EXECUTION_PLAN_QUERY, variables={ 'pipeline': selector, 'runConfigData': run_config, 'mode': 'default', }, ) steps_data = result.data['executionPlanOrError']['steps'] assert set([step_data['key'] for step_data in steps_data]) == set([ 'sum_solid.compute', 'sum_sq_solid.compute', ]) instance = graphql_context.instance pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, run_config=run_config) result = execute_dagster_graphql( graphql_context, EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': run_config, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', }, }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][ 0] assert metadata_entry['path'] == out_csv_path
def test_basic_execute_plan_with_materialization(): with get_temp_file_name() as out_csv_path: environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') }, 'outputs': [{ 'result': out_csv_path }], } } } instance = DagsterInstance.ephemeral() result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTION_PLAN_QUERY, variables={ 'pipeline': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'mode': 'default', }, ) steps_data = result.data['executionPlan']['steps'] assert [step_data['key'] for step_data in steps_data] == [ 'sum_solid.compute', 'sum_sq_solid.compute', ] pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, environment_dict=environment_dict) result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', } }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][ 0] assert metadata_entry['path'] == out_csv_path
def test_query_get_solid_exists(graphql_context): result = execute_dagster_graphql(graphql_context, get_solid_query_exists()) assert not result.errors assert result.data['usedSolid']['definition']['name'] == 'sum_solid'
def test_run_not_found(self, graphql_context): result = execute_dagster_graphql(graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": "nope"}) assert result.data["terminatePipelineExecution"][ "__typename"] == "PipelineRunNotFoundError"
def test_run_not_found(self, graphql_context): result = execute_dagster_graphql(graphql_context, RUN_CANCELLATION_QUERY, variables={'runId': 'nope'}) assert result.data['terminatePipelineExecution'][ '__typename'] == 'PipelineRunNotFoundError'
def test_basic_start_pipeline_execution_with_preset_failure(self, graphql_context): subset_selector = infer_pipeline_selector( graphql_context, "csv_hello_world", ["sum_sq_solid"] ) result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={"executionParams": {"selector": subset_selector, "preset": "test_inline",}}, ) # while illegally defining selector.solid_selection assert not result.errors assert result.data assert ( result.data["launchPipelineExecution"]["__typename"] == "ConflictingExecutionParamsError" ) assert ( result.data["launchPipelineExecution"]["message"] == "Invalid ExecutionParams. Cannot define selector.solid_selection when using a preset." ) # while illegally defining runConfigData selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "preset": "test_inline", "runConfigData": csv_hello_world_solids_config(), } }, ) assert not result.errors assert result.data assert ( result.data["launchPipelineExecution"]["__typename"] == "ConflictingExecutionParamsError" ) assert ( result.data["launchPipelineExecution"]["message"] == "Invalid ExecutionParams. Cannot define runConfigData when using a preset." ) # while illegally defining mode result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "preset": "test_inline", "mode": "default", } }, ) assert not result.errors assert result.data assert ( result.data["launchPipelineExecution"]["__typename"] == "ConflictingExecutionParamsError" ) assert ( result.data["launchPipelineExecution"]["message"] == "Invalid ExecutionParams. Cannot define mode when using a preset." )
def test_get_runs_over_graphql(self, graphql_context): # This include needs to be here because its inclusion screws up # other code in this file which reads itself to load a repo from .utils import sync_execute_get_run_log_data selector = infer_pipeline_selector(graphql_context, "multi_mode_with_resources") payload_one = sync_execute_get_run_log_data( context=graphql_context, variables={ "executionParams": { "selector": selector, "mode": "add_mode", "runConfigData": { "resources": { "op": { "config": 2 } } }, "executionMetadata": { "tags": [{ "key": "fruit", "value": "apple" }] }, } }, ) run_id_one = payload_one["run"]["runId"] read_context = graphql_context result = execute_dagster_graphql(read_context, RUNS_QUERY, variables={"selector": selector}) runs = result.data["pipelineOrError"]["runs"] assert len(runs) == 1 tags = runs[0]["tags"] assert len(tags) == 1 assert tags[0]["key"] == "fruit" assert tags[0]["value"] == "apple" payload_two = sync_execute_get_run_log_data( context=graphql_context, variables={ "executionParams": { "selector": selector, "mode": "add_mode", "runConfigData": { "resources": { "op": { "config": 3 } } }, "executionMetadata": { "tags": [{ "key": "veggie", "value": "carrot" }] }, } }, ) run_id_two = payload_two["run"]["runId"] result = execute_dagster_graphql(read_context, RUNS_QUERY, variables={"selector": selector}) runs = result.data["pipelineOrError"]["runs"] assert len(runs) == 2 all_tags_result = execute_dagster_graphql(read_context, ALL_TAGS_QUERY) tags = all_tags_result.data["pipelineRunTags"] assert len(tags) == 2 tags_dict = {item["key"]: item["values"] for item in tags} assert tags_dict == { "fruit": ["apple"], "veggie": ["carrot"], } # delete the second run result = execute_dagster_graphql(read_context, DELETE_RUN_MUTATION, variables={"runId": run_id_two}) assert result.data["deletePipelineRun"][ "__typename"] == "DeletePipelineRunSuccess" assert result.data["deletePipelineRun"]["runId"] == run_id_two # query it back out result = execute_dagster_graphql(read_context, RUNS_QUERY, variables={"selector": selector}) # first is the same run_one_data = _get_runs_data(result, run_id_one) assert run_one_data # second is gone run_two_data = _get_runs_data(result, run_id_two) assert run_two_data is None # try to delete the second run again execute_dagster_graphql(read_context, DELETE_RUN_MUTATION, variables={"runId": run_id_two}) result = execute_dagster_graphql(read_context, DELETE_RUN_MUTATION, variables={"runId": run_id_two}) assert result.data["deletePipelineRun"][ "__typename"] == "PipelineRunNotFoundError"
def test_asset_partitions_in_pipeline(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "two_assets_job") result = execute_dagster_graphql( graphql_context, GET_ASSET_PARTITIONS_FROM_KEYS, variables={"pipelineSelector": selector}, ) assert result.data assert result.data["pipelineOrError"] assert result.data["pipelineOrError"]["assetNodes"] assert len(result.data["pipelineOrError"]["assetNodes"]) == 2 asset_node = result.data["pipelineOrError"]["assetNodes"][0] assert asset_node["partitionKeys"] == [] selector = infer_pipeline_selector(graphql_context, "static_partitioned_assets_job") result = execute_dagster_graphql( graphql_context, GET_ASSET_PARTITIONS_FROM_KEYS, variables={"pipelineSelector": selector}, ) assert result.data assert result.data["pipelineOrError"] assert result.data["pipelineOrError"]["assetNodes"] assert len(result.data["pipelineOrError"]["assetNodes"]) == 2 asset_node = result.data["pipelineOrError"]["assetNodes"][0] assert asset_node["partitionKeys"] and asset_node["partitionKeys"] == [ "a", "b", "c", "d", ] asset_node = result.data["pipelineOrError"]["assetNodes"][1] assert asset_node["partitionKeys"] and asset_node["partitionKeys"] == [ "a", "b", "c", "d", ] selector = infer_pipeline_selector(graphql_context, "time_partitioned_assets_job") result = execute_dagster_graphql( graphql_context, GET_ASSET_PARTITIONS_FROM_KEYS, variables={"pipelineSelector": selector}, ) assert result.data assert result.data["pipelineOrError"] assert result.data["pipelineOrError"]["assetNodes"] assert len(result.data["pipelineOrError"]["assetNodes"]) == 2 asset_node = result.data["pipelineOrError"]["assetNodes"][0] # test partition starts at "2021-05-05-01:00". Should be > 100 partition keys # since partition is hourly assert asset_node["partitionKeys"] and len( asset_node["partitionKeys"]) > 100 assert asset_node["partitionKeys"][0] == "2021-05-05-01:00" assert asset_node["partitionKeys"][1] == "2021-05-05-02:00"
def test_run_groups_over_time(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir=tempdir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id context_at_time_1 = define_context_for_file(__file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t1_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test foo_run_id assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } context_at_time_2 = define_context_for_file(__file__, "get_repo_at_time_2", instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t2_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"] # names same assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] == t2_runs[evolve_a_run_id]["pipeline"]["name"]) # snapshots differ assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] != t2_runs[evolve_a_run_id]["pipelineSnapshotId"]) # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }
def test_get_all_partition_sets(graphql_context, snapshot): result = execute_dagster_graphql(graphql_context, GET_PARTITION_SETS_QUERY) assert result.data snapshot.assert_match(result.data)
def test_successful_pipeline_reexecution(snapshot): def sanitize_result_data(result_data): if isinstance(result_data, dict): if 'path' in result_data: result_data['path'] = 'DUMMY_PATH' result_data = { k: sanitize_result_data(v) for k, v in result_data.items() } elif isinstance(result_data, list): for i in range(len(result_data)): result_data[i] = sanitize_result_data(result_data[i]) else: pass return result_data run_id = make_new_run_id() instance = DagsterInstance.ephemeral() result_one = execute_dagster_graphql( define_test_context(instance=instance), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert (result_one.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess') snapshot.assert_match(sanitize_result_data(result_one.data)) expected_value_repr = ( '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), ''' '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), ''' '''('sum_sq', 49)])]''') store = build_fs_intermediate_store(instance.intermediates_directory, run_id) assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame).obj) == expected_value_repr) new_run_id = make_new_run_id() result_two = execute_dagster_graphql( define_test_context(instance=instance), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': new_run_id }, 'mode': 'default', 'retryRunId': run_id, } }, ) query_result = result_two.data['startPipelineExecution'] assert query_result['__typename'] == 'StartPipelineExecutionSuccess' logs = query_result['run']['logs']['nodes'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent') assert not get_step_output_event(logs, 'sum_solid.compute') assert get_step_output_event(logs, 'sum_sq_solid.compute') snapshot.assert_match(sanitize_result_data(result_two.data)) store = build_fs_intermediate_store(instance.intermediates_directory, new_run_id) assert not store.has_intermediate(None, 'sum_solid.inputs.num.read', 'input_thunk_output') assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame).obj) == expected_value_repr)
def test_get_partition_status(self, graphql_context): repository_selector = infer_repository_selector(graphql_context) result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": { "repositorySelector": repository_selector, "partitionSetName": "integer_partition", }, "partitionNames": ["2", "3"], "forceSynchronousSubmission": True, } }, ) assert not result.errors assert result.data["launchPartitionBackfill"][ "__typename"] == "LaunchBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 result = execute_dagster_graphql( graphql_context, query=GET_PARTITION_SET_STATUS_QUERY, variables={ "partitionSetName": "integer_partition", "repositorySelector": repository_selector, }, ) assert not result.errors assert result.data partitionStatuses = result.data["partitionSetOrError"][ "partitionStatusesOrError"]["results"] assert len(partitionStatuses) == 10 for partitionStatus in partitionStatuses: if partitionStatus["partitionName"] in ("2", "3"): assert partitionStatus["runStatus"] == "SUCCESS" else: assert partitionStatus["runStatus"] is None result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": { "repositorySelector": repository_selector, "partitionSetName": "integer_partition", }, "partitionNames": [str(num) for num in range(10)], "forceSynchronousSubmission": True, } }, ) assert not result.errors assert result.data["launchPartitionBackfill"][ "__typename"] == "LaunchBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 10 result = execute_dagster_graphql( graphql_context, query=GET_PARTITION_SET_STATUS_QUERY, variables={ "partitionSetName": "integer_partition", "repositorySelector": repository_selector, }, ) assert not result.errors assert result.data partitionStatuses = result.data["partitionSetOrError"][ "partitionStatusesOrError"]["results"] assert len(partitionStatuses) == 10 for partitionStatus in partitionStatuses: assert partitionStatus["runStatus"] == "SUCCESS"
def test_basic_start_pipeline_execution_with_preset_failure(self, graphql_context): subset_selector = infer_pipeline_selector( graphql_context, 'csv_hello_world', ['sum_sq_solid'] ) result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={'executionParams': {'selector': subset_selector, 'preset': 'test_inline',}}, ) # while illegally defining selector.solid_selection assert not result.errors assert result.data assert ( result.data['launchPipelineExecution']['__typename'] == 'ConflictingExecutionParamsError' ) assert ( result.data['launchPipelineExecution']['message'] == 'Invalid ExecutionParams. Cannot define selector.solid_selection when using a preset.' ) # while illegally defining runConfigData selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': selector, 'preset': 'test_inline', 'runConfigData': csv_hello_world_solids_config(), } }, ) assert not result.errors assert result.data assert ( result.data['launchPipelineExecution']['__typename'] == 'ConflictingExecutionParamsError' ) assert ( result.data['launchPipelineExecution']['message'] == 'Invalid ExecutionParams. Cannot define runConfigData when using a preset.' ) # while illegally defining mode result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': selector, 'preset': 'test_inline', 'mode': 'default', } }, ) assert not result.errors assert result.data assert ( result.data['launchPipelineExecution']['__typename'] == 'ConflictingExecutionParamsError' ) assert ( result.data['launchPipelineExecution']['message'] == 'Invalid ExecutionParams. Cannot define mode when using a preset.' )
def test_pipeline_reexecution_info_query(snapshot): context = define_context() run_id = str(uuid.uuid4()) execute_dagster_graphql( context, START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) new_run_id = str(uuid.uuid4()) execute_dagster_graphql( context, START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': new_run_id }, 'mode': 'default', }, 'reexecutionConfig': { 'previousRunId': run_id, 'stepOutputHandles': [{ 'stepKey': 'sum_solid.compute', 'outputName': 'result' }], }, }, ) result_one = execute_dagster_graphql(context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': run_id}) query_result_one = result_one.data['pipelineRunOrError'] assert query_result_one['__typename'] == 'PipelineRun' assert query_result_one['stepKeysToExecute'] is None result_two = execute_dagster_graphql(context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': new_run_id}) query_result_two = result_two.data['pipelineRunOrError'] assert query_result_two['__typename'] == 'PipelineRun' stepKeysToExecute = query_result_two['stepKeysToExecute'] assert stepKeysToExecute is not None snapshot.assert_match(stepKeysToExecute)
def test_successful_two_part_execute_plan(snapshot): instance = DagsterInstance.local_temp() environment_dict = csv_hello_world_solids_config_fs_storage() pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, environment_dict=environment_dict) result_one = execute_dagster_graphql( define_test_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'stepKeys': ['sum_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', } }, ) assert result_one.data['executePlan']['__typename'] == 'ExecutePlanSuccess' snapshot.assert_match(clean_log_messages(result_one.data)) result_two = execute_dagster_graphql( define_test_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', } }, ) query_result = result_two.data['executePlan'] assert query_result['__typename'] == 'ExecutePlanSuccess' assert query_result['pipeline']['name'] == 'csv_hello_world' assert query_result['hasFailures'] is False step_events = query_result['stepEvents'] assert [se['__typename'] for se in step_events] == [ 'ExecutionStepStartEvent', 'ObjectStoreOperationEvent', 'ExecutionStepInputEvent', 'ExecutionStepOutputEvent', 'ObjectStoreOperationEvent', 'ExecutionStepSuccessEvent', ] assert step_events[0]['step']['key'] == 'sum_sq_solid.compute' assert step_events[1]['step']['key'] == 'sum_sq_solid.compute' assert step_events[2]['step']['key'] == 'sum_sq_solid.compute' assert step_events[3]['outputName'] == 'result' assert step_events[4]['step']['key'] == 'sum_sq_solid.compute' snapshot.assert_match(clean_log_messages(result_two.data)) expected_value_repr = ( '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), ''' '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), ''' '''('sum_sq', 49)])]''') store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id) assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame).obj) == expected_value_repr)
def test_sensor_tick_range(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name()).get_repository(main_repo_name()) graphql_context.instance.reconcile_scheduler_state(external_repository) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) # test with no job state result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 # turn the sensor on graphql_context.instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) now = pendulum.now("US/Central") one = now.subtract(days=2).subtract(hours=1) with pendulum.test(one): _create_tick(graphql_context.instance) two = now.subtract(days=1).subtract(hours=1) with pendulum.test(two): _create_tick(graphql_context.instance) three = now.subtract(hours=1) with pendulum.test(three): _create_tick(graphql_context.instance) result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 3 result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "timestamp"] == three.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": 1 }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "timestamp"] == two.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 2, "dayOffset": None, }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 2
def test_smoke_test_dagster_type_system(graphql_context): result = execute_dagster_graphql(graphql_context, ALL_RUNTIME_TYPES_QUERY) assert not result.errors assert result.data