def test_pipeline_with_invalid_definition_error(self, graphql_context): selector = infer_pipeline_selector( graphql_context, "pipeline_with_invalid_definition_error", ["fail_subset"]) result = execute_dagster_graphql(graphql_context, SCHEMA_OR_ERROR_SUBSET_QUERY, {"selector": selector}) assert not result.errors assert result.data assert result.data["runConfigSchemaOrError"][ "__typename"] == "InvalidSubsetError" assert re.match( (r".*DagsterInvalidSubsetError[\s\S]*" r"add a dagster_type_loader for the type 'InputTypeWithoutHydration'" ), result.data["runConfigSchemaOrError"]["message"], )
def test_basic_valid_config_on_run_config_schema(self, graphql_context, snapshot): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, RUN_CONFIG_SCHEMA_CONFIG_VALIDATION_QUERY, variables={ "selector": selector, "mode": "default", "runConfigData": csv_hello_world_solids_config(), }, ) assert not result.errors assert result.data assert (result.data["runConfigSchemaOrError"]["isRunConfigValid"] ["__typename"] == "PipelineConfigValidationValid") snapshot.assert_match(result.data)
def test_get_partitioned_asset_key_lineage(self, graphql_context, snapshot): selector = infer_pipeline_selector(graphql_context, "partitioned_asset_lineage_pipeline") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={"executionParams": {"selector": selector, "mode": "default"}}, ) assert result.data["launchPipelineExecution"]["__typename"] == "LaunchRunSuccess" graphql_context.instance.run_launcher.join() result = execute_dagster_graphql( graphql_context, GET_ASSET_MATERIALIZATION, variables={"assetKey": {"path": ["b"]}}, ) assert result.data snapshot.assert_match(result.data)
def test_pipeline_or_error_by_name(graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world_two") result = execute_dagster_graphql( graphql_context, ''' query NamedPipelineQuery($selector: PipelineSelector!) { pipelineOrError(params: $selector) { ... on Pipeline { name } } }''', {'selector': selector}, ) assert not result.errors assert result.data assert result.data['pipelineOrError']['name'] == 'csv_hello_world_two'
def test_pipeline_with_invalid_definition_error(self, graphql_context): selector = infer_pipeline_selector( graphql_context, 'pipeline_with_invalid_definition_error', ['fail_subset'] ) result = execute_dagster_graphql( graphql_context, SCHEMA_OR_ERROR_SUBSET_QUERY, {'selector': selector} ) assert not result.errors assert result.data assert result.data['runConfigSchemaOrError']['__typename'] == 'InvalidSubsetError' assert re.match( ( r'.*DagsterInvalidSubsetError[\s\S]*' r'add a dagster_type_loader for the type "InputTypeWithoutHydration"' ), result.data['runConfigSchemaOrError']['message'], )
def test_pipeline_or_error_by_name(graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world_two") result = execute_dagster_graphql( graphql_context, """ query NamedPipelineQuery($selector: PipelineSelector!) { pipelineOrError(params: $selector) { ... on Pipeline { name } } }""", {"selector": selector}, ) assert not result.errors assert result.data assert result.data["pipelineOrError"]["name"] == "csv_hello_world_two"
def test_basic_start_pipeline_execution_with_non_existent_preset( self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') with pytest.raises(UserFacingGraphQLError) as exc_info: execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'preset': 'undefined_preset', } }, ) assert ( exc_info.value.dauphin_error.message == 'Preset undefined_preset not found in pipeline csv_hello_world.')
def test_basic_sync_execution_no_config(self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'no_config_pipeline') result = sync_execute_get_run_log_data( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'runConfigData': None, 'mode': 'default', } }, ) logs = result['messages'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent')
def test_required_resources(graphql_context, snapshot): selector = infer_pipeline_selector(graphql_context, "required_resource_pipeline") result = execute_dagster_graphql( graphql_context, REQUIRED_RESOURCE_QUERY, {"selector": selector}, ) assert not result.errors assert result.data assert result.data["pipelineOrError"]["solids"] [solid] = result.data["pipelineOrError"]["solids"] assert solid assert solid["definition"]["requiredResources"] assert solid["definition"]["requiredResources"] == [{"resourceKey": "R1"}] snapshot.assert_match(result.data)
def test_dagster_type_builtin_query(graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, RUNTIME_TYPE_QUERY, { "selector": selector, "dagsterTypeName": "Int", }, ) assert not result.errors assert result.data assert (result.data["pipelineOrError"]["dagsterTypeOrError"]["__typename"] == "RegularDagsterType") assert result.data["pipelineOrError"]["dagsterTypeOrError"][ "name"] == "Int" assert result.data["pipelineOrError"]["dagsterTypeOrError"]["isBuiltin"]
def test_backcompat_termination(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "infinite_loop_pipeline") with safe_tempfile_path() as path: result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "mode": "default", "runConfigData": { "solids": { "loop": { "config": { "file": path } } } }, } }, ) assert not result.errors assert result.data # just test existence assert result.data["launchPipelineExecution"][ "__typename"] == "LaunchRunSuccess" run_id = result.data["launchPipelineExecution"]["run"]["runId"] assert run_id # ensure the execution has happened while not os.path.exists(path): time.sleep(0.1) result = execute_dagster_graphql( graphql_context, BACKCOMPAT_LEGACY_TERMINATE_PIPELINE, variables={"runId": run_id}) assert result.data["terminatePipelineExecution"]["run"][ "runId"] == run_id
def test_basic_termination(self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'infinite_loop_pipeline') with safe_tempfile_path() as path: result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': selector, 'mode': 'default', 'runConfigData': { 'solids': { 'loop': { 'config': { 'file': path } } } }, } }, ) assert not result.errors assert result.data # just test existence assert (result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess') run_id = result.data['launchPipelineExecution']['run']['runId'] assert run_id # ensure the execution has happened while not os.path.exists(path): time.sleep(0.1) result = execute_dagster_graphql(graphql_context, RUN_CANCELLATION_QUERY, variables={'runId': run_id}) assert (result.data['terminatePipelineExecution']['__typename'] == 'TerminatePipelineExecutionSuccess')
def test_retry_resource_pipeline(self, graphql_context): context = graphql_context selector = infer_pipeline_selector(graphql_context, 'retry_resource_pipeline') result = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': {'storage': {'filesystem': {}}}, } }, ) run_id = result.data['launchPipelineExecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription(context, run_id)['pipelineRunLogs'][ 'messages' ] assert step_did_succeed(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute') retry_one = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_REEXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': {'storage': {'filesystem': {}}}, 'executionMetadata': { 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}], }, } }, ) run_id = retry_one.data['launchPipelineReexecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription(context, run_id)['pipelineRunLogs'][ 'messages' ] assert step_did_not_run(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute')
def test_latest_materialization_per_partition(self, graphql_context): _create_run(graphql_context, "partition_materialization_job") selector = infer_pipeline_selector(graphql_context, "partition_materialization_job") result = execute_dagster_graphql( graphql_context, GET_LATEST_MATERIALIZATION_PER_PARTITION, variables={"pipelineSelector": selector, "partitions": ["a"]}, ) assert result.data assert result.data["assetNodes"] asset_node = result.data["assetNodes"][0] assert len(asset_node["latestMaterializationByPartition"]) == 1 assert asset_node["latestMaterializationByPartition"][0] == None result = execute_dagster_graphql( graphql_context, GET_LATEST_MATERIALIZATION_PER_PARTITION, variables={"pipelineSelector": selector, "partitions": ["c"]}, ) assert result.data assert result.data["assetNodes"] asset_node = result.data["assetNodes"][0] assert len(asset_node["latestMaterializationByPartition"]) == 1 materialization = asset_node["latestMaterializationByPartition"][0] start_time = materialization["stepStats"]["startTime"] assert materialization["partition"] == "c" _create_run(graphql_context, "partition_materialization_job") result = execute_dagster_graphql( graphql_context, GET_LATEST_MATERIALIZATION_PER_PARTITION, variables={"pipelineSelector": selector, "partitions": ["c", "a"]}, ) assert result.data and result.data["assetNodes"] asset_node = result.data["assetNodes"][0] assert len(asset_node["latestMaterializationByPartition"]) == 2 materialization = asset_node["latestMaterializationByPartition"][0] new_start_time = materialization["stepStats"]["startTime"] assert new_start_time > start_time assert asset_node["latestMaterializationByPartition"][1] == None
def test_config_map(self, graphql_context): # Check validity result = execute_config_graphql( graphql_context, pipeline_name="config_with_map", run_config={ "solids": { "a_solid_with_map_config": { "config": { "field_one": { "test": 5 } } } } }, mode="default", ) assert not result.errors assert result.data valid_data = result.data["isPipelineConfigValid"] assert valid_data["__typename"] == "PipelineConfigValidationValid" assert valid_data["pipelineName"] == "config_with_map" # Sanity check GraphQL result for types selector = infer_pipeline_selector(graphql_context, "config_with_map") result = execute_dagster_graphql( graphql_context, ALL_CONFIG_TYPES_QUERY, { "selector": selector, "mode": "default" }, ) config_types_data = result.data["runConfigSchemaOrError"][ "allConfigTypes"] # Ensure the first config type, Map(str, int, name="username") is in the result assert any( config_type_data.get("keyLabelName") == "username" and config_type_data.get("keyType", {}).get("key", "") == "String" and config_type_data.get("valueType", {}).get("key", "") == "Int" for config_type_data in config_types_data)
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id): selector = infer_pipeline_selector(graphql_context, "eventually_successful") logs = sync_execute_get_events( context=graphql_context, variables={ "executionParams": { "mode": "default", "selector": selector, "executionMetadata": { "runId": run_id }, } }, ) assert step_did_succeed(logs, "spawn.compute") assert step_did_fail(logs, "fail.compute") assert step_did_skip(logs, "fail_2.compute") assert step_did_skip(logs, "fail_3.compute") assert step_did_skip(logs, "reset.compute") retry_one = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "executionMetadata": { "runId": reexecution_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) return retry_one
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id): selector = infer_pipeline_selector(graphql_context, 'eventually_successful') logs = sync_execute_get_events( context=graphql_context, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'executionMetadata': { 'runId': run_id }, } }, ) assert step_did_succeed(logs, 'spawn.compute') assert step_did_fail(logs, 'fail.compute') assert step_did_skip(logs, 'fail_2.compute') assert step_did_skip(logs, 'fail_3.compute') assert step_did_skip(logs, 'reset.compute') retry_one = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'executionMetadata': { 'runId': reexecution_run_id, 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, } }, ) return retry_one
def test_start_pipeline_execution_malformed_config(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": '{"foo": {{{{', "mode": "default", } }, ) assert not result.errors assert result.data assert result.data["launchPipelineExecution"]["__typename"] == "PythonError" assert "JSONDecodeError" in result.data["launchPipelineExecution"]["message"]
def test_basic_start_pipeline_and_fetch(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") exc_result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": { "solids": { "sum_solid": { "inputs": {"num": file_relative_path(__file__, "../data/num.csv")} } } }, "mode": "default", } }, ) assert not exc_result.errors assert exc_result.data assert exc_result.data["launchPipelineExecution"]["__typename"] == "LaunchRunSuccess" # block until run finishes graphql_context.instance.run_launcher.join() events_result = execute_dagster_graphql( graphql_context, RUN_EVENTS_QUERY, variables={"runId": exc_result.data["launchPipelineExecution"]["run"]["runId"]}, ) assert not events_result.errors assert events_result.data assert events_result.data["pipelineRunOrError"]["__typename"] == "Run" non_engine_event_types = [ message["__typename"] for message in events_result.data["pipelineRunOrError"]["events"] if message["__typename"] != "EngineEvent" ] assert non_engine_event_types == self._csv_hello_world_event_sequence()
def test_pipeline_or_error_by_name_not_found(graphql_context): selector = infer_pipeline_selector(graphql_context, "foobar") result = execute_dagster_graphql( graphql_context, """ query NamedPipelineQuery($selector: PipelineSelector!) { pipelineOrError(params: $selector) { __typename ... on Pipeline { name } } }""", {"selector": selector}, ) assert not result.errors assert result.data assert result.data["pipelineOrError"]["__typename"] == "PipelineNotFoundError"
def test_pipeline_reexecution_invalid_step_in_subset(self, graphql_context): run_id = make_new_run_id() selector = infer_pipeline_selector(graphql_context, "csv_hello_world") execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "executionMetadata": {"runId": run_id}, "mode": "default", } }, ) # retry new_run_id = make_new_run_id() result_two = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "stepKeys": ["nope"], "executionMetadata": { "runId": new_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, "mode": "default", } }, ) query_result = result_two.data["launchPipelineReexecution"] assert query_result["__typename"] == "PythonError" assert query_result["className"] == "DagsterExecutionStepNotFoundError" assert "Can not build subset plan from unknown step: nope" in query_result["message"]
def test_retry_resource_pipeline(self, graphql_context): context = graphql_context selector = infer_pipeline_selector(graphql_context, "retry_resource_pipeline") result = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, } }, ) run_id = result.data["launchPipelineExecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription(context, run_id)["pipelineRunLogs"][ "messages" ] assert step_did_succeed(logs, "start") assert step_did_fail(logs, "will_fail") retry_one = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, } }, ) run_id = retry_one.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription(context, run_id)["pipelineRunLogs"][ "messages" ] assert step_did_not_run(logs, "start") assert step_did_fail(logs, "will_fail")
def test_enum_query(graphql_context): selector = infer_pipeline_selector(graphql_context, "pipeline_with_enum_config") ENUM_QUERY = """ query EnumQuery($selector: PipelineSelector!) { runConfigSchemaOrError(selector: $selector) { ... on RunConfigSchema { allConfigTypes { __typename key ... on EnumConfigType { values { value description } } } } } } """ result = execute_dagster_graphql(graphql_context, ENUM_QUERY, {"selector": selector,},) assert not result.errors assert result.data enum_type_data = None for td in result.data["runConfigSchemaOrError"]["allConfigTypes"]: if td["key"] == "TestEnum": enum_type_data = td break assert enum_type_data assert enum_type_data["key"] == "TestEnum" assert enum_type_data["values"] == [ {"value": "ENUM_VALUE_ONE", "description": "An enum value."}, {"value": "ENUM_VALUE_TWO", "description": "An enum value."}, {"value": "ENUM_VALUE_THREE", "description": "An enum value."}, ]
def test_basic_start_pipeline_execution_and_subscribe(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") run_logs = sync_execute_get_run_log_data( context=graphql_context, variables={ "executionParams": { "selector": selector, "runConfigData": { "solids": { "sum_solid": { "inputs": {"num": file_relative_path(__file__, "../data/num.csv")} } } }, "mode": "default", } }, ) assert run_logs["__typename"] == "PipelineRunLogsSubscriptionSuccess" non_engine_event_types = [ message["__typename"] for message in run_logs["messages"] if message["__typename"] != "EngineEvent" ] expected_non_engine_event_types = [ "PipelineStartingEvent", "PipelineStartEvent", "ExecutionStepStartEvent", "ExecutionStepInputEvent", "ExecutionStepOutputEvent", "HandledOutputEvent", "ExecutionStepSuccessEvent", "ExecutionStepStartEvent", "LoadedInputEvent", "ExecutionStepInputEvent", "ExecutionStepOutputEvent", "HandledOutputEvent", "ExecutionStepSuccessEvent", "PipelineSuccessEvent", ] assert non_engine_event_types == expected_non_engine_event_types
def test_basic_start_pipeline_execution_and_subscribe( self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') run_logs = sync_execute_get_run_log_data( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'runConfigData': { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') } } } }, 'mode': 'default', } }, ) assert run_logs['__typename'] == 'PipelineRunLogsSubscriptionSuccess' non_engine_event_types = [ message['__typename'] for message in run_logs['messages'] if message['__typename'] != 'EngineEvent' ] expected_non_engine_event_types = [ 'PipelineStartEvent', 'ExecutionStepStartEvent', 'ExecutionStepInputEvent', 'ExecutionStepOutputEvent', 'ExecutionStepSuccessEvent', 'ExecutionStepStartEvent', 'ExecutionStepInputEvent', 'ExecutionStepOutputEvent', 'ExecutionStepSuccessEvent', 'PipelineSuccessEvent', ] assert non_engine_event_types == expected_non_engine_event_types
def test_start_pipeline_execution_readonly_failure(self, graphql_context): assert graphql_context.read_only == True selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "mode": "default", } }, ) assert not result.errors assert result.data assert result.data["launchPipelineExecution"]["__typename"] == "UnauthorizedError"
def test_basis_start_pipeline_not_found_error(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "sjkdfkdjkf") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": {"solids": {"sum_solid": {"inputs": {"num": "test.csv"}}}}, "mode": "default", } }, ) assert not result.errors assert result.data # just test existence assert result.data["launchPipelineExecution"]["__typename"] == "PipelineNotFoundError" assert result.data["launchPipelineExecution"]["pipelineName"] == "sjkdfkdjkf"
def test_smoke_test_config_type_system(self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'more_complicated_nested_config') result = execute_dagster_graphql( graphql_context, ALL_CONFIG_TYPES_QUERY, { 'selector': selector, 'mode': 'default' }, ) config_types_data = result.data['runConfigSchemaOrError'][ 'allConfigTypes'] assert has_config_type_with_key_prefix(config_types_data, 'Shape.') for builtin_config_type in ALL_CONFIG_BUILTINS: assert has_config_type(config_types_data, builtin_config_type.given_name)
def test_basic_start_pipeline_execution_config_failure(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": {"solids": {"sum_solid": {"inputs": {"num": 384938439}}}}, "mode": "default", } }, ) assert not result.errors assert result.data assert ( result.data["launchPipelineExecution"]["__typename"] == "PipelineConfigValidationInvalid" )
def test_pipeline_or_error_by_name_not_found(graphql_context): selector = infer_pipeline_selector(graphql_context, "foobar") result = execute_dagster_graphql( graphql_context, ''' query NamedPipelineQuery($selector: PipelineSelector!) { pipelineOrError(params: $selector) { __typename ... on Pipeline { name } } }''', {'selector': selector}, ) assert not result.errors assert result.data assert result.data['pipelineOrError'][ '__typename'] == 'PipelineNotFoundError'