def test_get_asset_runs(self, graphql_context): single_selector = get_legacy_pipeline_selector(graphql_context, 'single_asset_pipeline') multi_selector = get_legacy_pipeline_selector(graphql_context, 'multi_asset_pipeline') result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={'executionParams': {'selector': single_selector, 'mode': 'default'}}, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess' single_run_id = result.data['launchPipelineExecution']['run']['runId'] result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={'executionParams': {'selector': multi_selector, 'mode': 'default'}}, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess' multi_run_id = result.data['launchPipelineExecution']['run']['runId'] result = execute_dagster_graphql( graphql_context, GET_ASSET_RUNS, variables={'assetKey': {'path': ['a']}} ) assert result.data fetched_runs = [run['runId'] for run in result.data['assetOrError']['runs']] assert len(fetched_runs) == 2 assert multi_run_id in fetched_runs assert single_run_id in fetched_runs
def test_subscription_query_error(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'naughty_programmer_pipeline') run_logs = sync_execute_get_run_log_data( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'mode': 'default', } }, ) assert run_logs['__typename'] == 'PipelineRunLogsSubscriptionSuccess' step_run_log_entry = _get_step_run_log_entry( run_logs, 'throw_a_thing.compute', 'ExecutionStepFailureEvent') assert step_run_log_entry # Confirm that it is the user stack assert step_run_log_entry[ 'message'] == 'Execution of step "throw_a_thing.compute" failed.' assert step_run_log_entry['error'] assert step_run_log_entry['level'] == 'ERROR' assert isinstance(step_run_log_entry['error']['stack'], list) assert 'bad programmer' in step_run_log_entry['error']['stack'][-1]
def get_retry_multi_execution_params(graphql_context, should_fail, retry_id=None): selector = get_legacy_pipeline_selector(graphql_context, 'retry_multi_output_pipeline') return { 'mode': 'default', 'selector': selector, 'runConfigData': { 'storage': { 'filesystem': {} }, 'solids': { 'can_fail': { 'config': { 'fail': should_fail } } }, }, 'executionMetadata': { 'rootRunId': retry_id, 'parentRunId': retry_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, }
def test_start_pipeline_execution(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config(), 'mode': 'default', } }, ) assert not result.errors assert result.data # just test existence assert result.data['launchPipelineExecution'][ '__typename'] == 'LaunchPipelineRunSuccess' assert uuid.UUID( result.data['launchPipelineExecution']['run']['runId']) assert (result.data['launchPipelineExecution']['run']['pipeline'] ['name'] == 'csv_hello_world')
def test_basic_filesystem_sync_execution(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = sync_execute_get_run_log_data( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'runConfigData': merge_dicts(csv_hello_world_solids_config(), {'storage': { 'filesystem': {} }}), 'mode': 'default', } }, ) logs = result['messages'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent') assert first_event_of_type(logs, 'PipelineStartEvent')['level'] == 'DEBUG' sum_solid_output = get_step_output_event(logs, 'sum_solid.compute') assert sum_solid_output['step']['key'] == 'sum_solid.compute' assert sum_solid_output['outputName'] == 'result'
def test_solid_id(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, "composites_pipeline") result = execute_dagster_graphql(graphql_context, SOLID_ID_QUERY, { 'selector': selector, 'id': 'add_four' }) assert result.data['pipelineOrError']['solidHandle'][ 'handleID'] == 'add_four' result = execute_dagster_graphql( graphql_context, SOLID_ID_QUERY, { 'selector': selector, 'id': 'add_four.adder_1.adder_1' }, ) assert (result.data['pipelineOrError']['solidHandle']['handleID'] == 'add_four.adder_1.adder_1') result = execute_dagster_graphql(graphql_context, SOLID_ID_QUERY, { 'selector': selector, 'id': 'bonkahog' }) assert result.data['pipelineOrError']['solidHandle'] == None
def test_successful_one_part_execute_plan(graphql_context, snapshot): instance = graphql_context.instance environment_dict = csv_hello_world_solids_config_fs_storage() pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, environment_dict=environment_dict) selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': environment_dict, 'stepKeys': ['sum_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', } }, ) query_result = result.data['executePlan'] assert query_result['__typename'] == 'ExecutePlanSuccess' assert query_result['pipeline']['name'] == 'csv_hello_world' assert query_result['hasFailures'] is False step_events = query_result['stepEvents'] assert [se['__typename'] for se in step_events] == [ 'ExecutionStepStartEvent', 'ExecutionStepInputEvent', 'ExecutionStepOutputEvent', 'ObjectStoreOperationEvent', 'ExecutionStepSuccessEvent', ] assert step_events[1]['step']['key'] == 'sum_solid.compute' assert step_events[2]['outputName'] == 'result' expected_value_repr = ( '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3)]), ''' '''OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7)])]''') assert step_events[3]['step']['key'] == 'sum_solid.compute' assert step_events[4]['step']['key'] == 'sum_solid.compute' snapshot.assert_match(clean_log_messages(result.data)) store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id) intermediates_manager = IntermediateStoreIntermediatesManager(store) assert intermediates_manager.has_intermediate( None, StepOutputHandle('sum_solid.compute')) assert (str( intermediates_manager.get_intermediate( None, PoorMansDataFrame, StepOutputHandle('sum_solid.compute')).obj) == expected_value_repr)
def test_basic_start_pipeline_execution_with_pipeline_def_tags( self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'hello_world_with_tags') result = execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'mode': 'default', }, }, ) assert not result.errors assert result.data['startPipelineExecution']['run']['tags'] == [{ 'key': 'tag_key', 'value': 'tag_value' }] # just test existence assert result.data['startPipelineExecution'][ '__typename'] == 'StartPipelineRunSuccess' assert uuid.UUID(result.data['startPipelineExecution']['run']['runId']) assert (result.data['startPipelineExecution']['run']['pipeline'] ['name'] == 'hello_world_with_tags')
def test_invalid_config_fetch_execute_plan(graphql_context, snapshot): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, EXECUTION_PLAN_QUERY, variables={ 'pipeline': selector, 'runConfigData': { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': 384938439 } } } } } }, 'mode': 'default', }, ) assert not result.errors assert result.data assert result.data['executionPlanOrError'][ '__typename'] == 'PipelineConfigValidationInvalid' assert len(result.data['executionPlanOrError']['errors']) == 1 assert ('Invalid scalar at path root:solids:sum_solid:inputs:num' in result.data['executionPlanOrError']['errors'][0]['message']) result.data['executionPlanOrError']['errors'][0][ 'message'] = 'Invalid scalar at path root:solids:sum_solid:inputs:num' snapshot.assert_match(result.data)
def test_run_launcher_subset(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'more_complicated_config', ['noop_solid']) result = execute_dagster_graphql( context=graphql_context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': selector, 'mode': 'default', } }, ) assert result.data['launchPipelineExecution'][ '__typename'] == 'LaunchPipelineRunSuccess' assert result.data['launchPipelineExecution']['run'][ 'status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] graphql_context.instance.run_launcher.join() result = execute_dagster_graphql(context=graphql_context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS' assert result.data['pipelineRunOrError']['stats'][ 'stepsSucceeded'] == 1
def test_start_pipeline_execution_with_start_disabled( self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config(), 'executionMetadata': { 'tags': [{ 'key': 'dagster/test_key', 'value': 'test_value' }] }, 'mode': 'default', } }, ) assert result.data assert (result.data['startPipelineExecution']['__typename'] == 'StartPipelineRunDisabledError')
def test_type_rendering(graphql_context): selector = get_legacy_pipeline_selector(graphql_context, "more_complicated_nested_config") result = execute_dagster_graphql(graphql_context, TYPE_RENDER_QUERY, {'selector': selector}) assert not result.errors assert result.data
def test_basis_start_pipeline_not_found_error(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'sjkdfkdjkf') result = execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': { 'solids': { 'sum_solid': { 'inputs': { 'num': 'test.csv' } } } }, 'mode': 'default', } }, ) assert not result.errors assert result.data # just test existence assert result.data['startPipelineExecution'][ '__typename'] == 'PipelineNotFoundError' assert result.data['startPipelineExecution'][ 'pipelineName'] == 'sjkdfkdjkf'
def test_basic_start_pipeline_execution_config_failure( self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') result = execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': { 'solids': { 'sum_solid': { 'inputs': { 'num': 384938439 } } } }, 'mode': 'default', } }, ) assert not result.errors assert result.data assert (result.data['startPipelineExecution']['__typename'] == 'PipelineConfigValidationInvalid')
def test_multi_mode_successful(graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'multi_mode_with_resources') add_mode_logs = sync_execute_get_events( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'mode': 'add_mode', 'runConfigData': { 'resources': { 'op': { 'config': 2 } } }, } }, ) assert get_step_output(add_mode_logs, 'apply_to_three.compute') mult_mode_logs = sync_execute_get_events( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'mode': 'mult_mode', 'runConfigData': { 'resources': { 'op': { 'config': 2 } } }, } }, ) assert get_step_output(mult_mode_logs, 'apply_to_three.compute') double_adder_mode_logs = sync_execute_get_events( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'mode': 'double_adder', 'runConfigData': { 'resources': { 'op': { 'config': { 'num_one': 2, 'num_two': 4 } } } }, } }, ) get_step_output(double_adder_mode_logs, 'apply_to_three.compute')
def test_basic_start_pipeline_execution_with_preset_failure( self, graphql_context): subset_selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world', ['sum_sq_solid']) # These should check for proper graphql errors rather than check errors # https://github.com/dagster-io/dagster/issues/2507 with pytest.raises(check.CheckError): execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': subset_selector, 'preset': 'test_inline', } }, ) selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') with pytest.raises(check.CheckError): execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'preset': 'test_inline', 'runConfigData': csv_hello_world_solids_config(), } }, ) with pytest.raises(check.CheckError): execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'preset': 'test_inline', 'mode': 'default', } }, )
def test_enum_query(graphql_context): selector = get_legacy_pipeline_selector(graphql_context, "pipeline_with_enum_config") ENUM_QUERY = ''' query EnumQuery($selector: PipelineSelector!) { runConfigSchemaOrError(selector: $selector) { ... on RunConfigSchema { allConfigTypes { __typename key ... on EnumConfigType { values { value description } } } } } } ''' result = execute_dagster_graphql( graphql_context, ENUM_QUERY, { 'selector': selector, }, ) assert not result.errors assert result.data enum_type_data = None for td in result.data['runConfigSchemaOrError']['allConfigTypes']: if td['key'] == 'TestEnum': enum_type_data = td break assert enum_type_data assert enum_type_data['key'] == 'TestEnum' assert enum_type_data['values'] == [ { 'value': 'ENUM_VALUE_ONE', 'description': 'An enum value.' }, { 'value': 'ENUM_VALUE_TWO', 'description': 'An enum value.' }, { 'value': 'ENUM_VALUE_THREE', 'description': 'An enum value.' }, ]
def test_csv_hello_world_pipeline_or_error_subset_wrong_solid_name(graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world', ['nope']) result = execute_dagster_graphql( graphql_context, SCHEMA_OR_ERROR_SUBSET_QUERY, {'selector': selector} ) assert not result.errors assert result.data assert result.data['runConfigSchemaOrError']['__typename'] == 'InvalidSubsetError' assert '"nope" does not exist' in result.data['runConfigSchemaOrError']['message']
def execute_modes_query(context, pipeline_name, mode): selector = get_legacy_pipeline_selector(context, pipeline_name) return execute_dagster_graphql( context, MODE_QUERY, variables={ 'selector': selector, 'mode': mode, }, )
def execute_config_graphql(context, pipeline_name, environment_dict, mode): selector = get_legacy_pipeline_selector(context, pipeline_name) return execute_dagster_graphql( context, CONFIG_VALIDATION_QUERY, { 'runConfigData': environment_dict, 'pipeline': selector, 'mode': mode, }, )
def test_retry_resource_pipeline(self, graphql_context): context = graphql_context selector = get_legacy_pipeline_selector(graphql_context, 'retry_resource_pipeline') result = execute_dagster_graphql_and_finish_runs( context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': { 'storage': { 'filesystem': {} } }, } }, ) run_id = result.data['startPipelineExecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)['pipelineRunLogs']['messages'] assert step_did_succeed(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute') retry_one = execute_dagster_graphql_and_finish_runs( context, START_PIPELINE_REEXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': { 'storage': { 'filesystem': {} } }, 'executionMetadata': { 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, } }, ) run_id = retry_one.data['startPipelineReexecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)['pipelineRunLogs']['messages'] assert step_did_not_run(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute')
def test_run_config_schema_pipeline_not_found(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'jkdjfkdjfd') result = execute_dagster_graphql( graphql_context, RUN_CONFIG_SCHEMA_QUERY, variables={ 'selector': selector, 'mode': 'add_mode' }, ) assert result.data['runConfigSchemaOrError'][ '__typename'] == 'PipelineNotFoundError'
def test_get_all_asset_keys(self, graphql_context, snapshot): selector = get_legacy_pipeline_selector(graphql_context, 'multi_asset_pipeline') result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={'executionParams': {'selector': selector, 'mode': 'default'}}, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess' result = execute_dagster_graphql(graphql_context, GET_ASSET_KEY_QUERY) assert result.data snapshot.assert_match(result.data)
def test_composites(self, graphql_context, snapshot): selector = get_legacy_pipeline_selector(graphql_context, "composites_pipeline") result = execute_dagster_graphql(graphql_context, COMPOSITES_QUERY, {'selector': selector}) handle_map = {} for obj in result.data['pipelineOrError']['solidHandles']: handle_map[obj['handleID']] = obj['solid'] assert len(handle_map) == 10 snapshot.assert_match(result.data)
def test_terminate_failed(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'infinite_loop_pipeline') with safe_tempfile_path() as path: old_terminate = graphql_context.instance.run_launcher.terminate graphql_context.instance.run_launcher.terminate = lambda _run_id: False result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'mode': 'default', 'runConfigData': { 'solids': { 'loop': { 'config': { 'file': path } } } }, } }, ) assert not result.errors assert result.data # just test existence assert (result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess') run_id = result.data['launchPipelineExecution']['run']['runId'] # ensure the execution has happened while not os.path.exists(path): time.sleep(0.1) result = execute_dagster_graphql(graphql_context, RUN_CANCELLATION_QUERY, variables={'runId': run_id}) assert (result.data['terminatePipelineExecution']['__typename'] == 'TerminatePipelineExecutionFailure') assert result.data['terminatePipelineExecution'][ 'message'].startswith('Unable to terminate run') graphql_context.instance.run_launcher.terminate = old_terminate result = execute_dagster_graphql(graphql_context, RUN_CANCELLATION_QUERY, variables={'runId': run_id}) assert (result.data['terminatePipelineExecution']['__typename'] == 'TerminatePipelineExecutionSuccess')
def test_run_config_schema_mode_not_found(graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'multi_mode_with_resources') result = execute_dagster_graphql( graphql_context, RUN_CONFIG_SCHEMA_QUERY, variables={ 'selector': selector, 'mode': 'kdjfdk' }, ) assert result.data['runConfigSchemaOrError'][ '__typename'] == 'ModeNotFoundError'
def test_successful_run_config_schema(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'multi_mode_with_resources') result = execute_dagster_graphql( graphql_context, RUN_CONFIG_SCHEMA_QUERY, variables={ 'selector': selector, 'mode': 'add_mode', }, ) assert result.data['runConfigSchemaOrError'][ '__typename'] == 'RunConfigSchema'
def test_get_asset_key_materialization(self, graphql_context, snapshot): selector = get_legacy_pipeline_selector(graphql_context, 'single_asset_pipeline') result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={'executionParams': {'selector': selector, 'mode': 'default'}}, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineRunSuccess' result = execute_dagster_graphql( graphql_context, GET_ASSET_MATERIALIZATION, variables={'assetKey': {'path': ['a']}} ) assert result.data snapshot.assert_match(result.data)
def test_full_pipeline_reexecution_fs_storage(self, graphql_context, snapshot): selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') run_id = make_new_run_id() result_one = execute_dagster_graphql( graphql_context, START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert result_one.data['startPipelineExecution'][ '__typename'] == 'StartPipelineRunSuccess' snapshot.assert_match(sanitize_result_data(result_one.data)) # reexecution new_run_id = make_new_run_id() result_two = execute_dagster_graphql( graphql_context, START_PIPELINE_REEXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': new_run_id, 'rootRunId': run_id, 'parentRunId': run_id, }, 'mode': 'default', } }, ) query_result = result_two.data['startPipelineReexecution'] assert query_result['__typename'] == 'StartPipelineRunSuccess' assert query_result['run']['rootRunId'] == run_id assert query_result['run']['parentRunId'] == run_id
def test_run_config_schema_solid_not_found(self, graphql_context): selector = get_legacy_pipeline_selector(graphql_context, 'multi_mode_with_resources', ['kdjfkdj']) result = execute_dagster_graphql( graphql_context, RUN_CONFIG_SCHEMA_QUERY, variables={ 'selector': selector, 'mode': 'add_mode', }, ) assert result.data['runConfigSchemaOrError'][ '__typename'] == 'InvalidSubsetError'