def test_filtered_runs(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo = get_repo_at_time_1() run_id_1 = execute_pipeline( repo.get_pipeline('foo_pipeline'), instance=instance, tags={'run': 'one'} ).run_id _run_id_2 = execute_pipeline( repo.get_pipeline('foo_pipeline'), instance=instance, tags={'run': 'two'} ).run_id context = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={'filter': {'runId': run_id_1}} ) assert result.data run_ids = [run['runId'] for run in result.data['pipelineRunsOrError']['results']] assert len(run_ids) == 1 assert run_ids[0] == run_id_1 result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={'filter': {'tags': [{'key': 'run', 'value': 'one'}]}}, ) assert result.data run_ids = [run['runId'] for run in result.data['pipelineRunsOrError']['results']] assert len(run_ids) == 1 assert run_ids[0] == run_id_1
def test_run_groups(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir=tempdir) repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline('foo_pipeline') root_run_ids = [execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3)] for _ in range(5): for root_run_id in root_run_ids: execute_pipeline( foo_pipeline, tags={PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id}, instance=instance, ) context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert 'runGroupsOrError' in result.data assert 'results' in result.data['runGroupsOrError'] assert len(result.data['runGroupsOrError']['results']) == 3 for run_group in result.data['runGroupsOrError']['results']: assert run_group['rootRunId'] in root_run_ids assert len(run_group['runs']) == 6
def test_get_all_asset_keys(snapshot): with seven.TemporaryDirectory() as temp_dir: instance = get_instance(temp_dir) repo = asset_repo() execute_pipeline(repo.get_pipeline('multi_asset_pipeline'), instance=instance) context = define_context_for_file(__file__, 'asset_repo', instance) result = execute_dagster_graphql(context, GET_ASSET_KEY_QUERY) assert result.data snapshot.assert_match(result.data)
def test_get_asset_key_materialization(snapshot): with seven.TemporaryDirectory() as temp_dir: instance = get_instance(temp_dir) repo = asset_repo() execute_pipeline(repo.get_pipeline('single_asset_pipeline'), instance=instance) context = define_context_for_file(__file__, 'asset_repo', instance) result = execute_dagster_graphql(context, GET_ASSET_MATERIALIZATION, variables={'assetKey': 'a'}) assert result.data snapshot.assert_match(result.data)
def test_run_group_not_found(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': 'foo'}, ) assert result.data assert result.data['runGroupOrError'] assert result.data['runGroupOrError']['__typename'] == 'RunGroupNotFoundError' assert result.data['runGroupOrError']['runId'] == 'foo' assert result.data['runGroupOrError'][ 'message' ] == 'Run group of run {run_id} could not be found.'.format(run_id='foo')
def test_run_group(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline('foo_pipeline') runs = [execute_pipeline(foo_pipeline, instance=instance)] root_run_id = runs[-1].run_id for _ in range(3): # https://github.com/dagster-io/dagster/issues/2433 run = instance.create_run_for_pipeline( foo_pipeline, parent_run_id=root_run_id, root_run_id=root_run_id, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, ) execute_run(InMemoryExecutablePipeline(foo_pipeline), run, instance) runs.append(run) context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result_one = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': root_run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_one.data['runGroupOrError']['runs']) == 4 result_two = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': runs[-1].run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_two.data['runGroupOrError']['runs']) == 4 assert (result_one.data['runGroupOrError']['rootRunId'] == result_two.data['runGroupOrError']['rootRunId']) assert (result_one.data['runGroupOrError']['runs'] == result_two.data['runGroupOrError']['runs'])
def test_filtered_runs(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo = get_repo_at_time_1() run_id_1 = execute_pipeline(repo.get_pipeline("foo_pipeline"), instance=instance, tags={ "run": "one" }).run_id _run_id_2 = execute_pipeline(repo.get_pipeline("foo_pipeline"), instance=instance, tags={ "run": "two" }).run_id context = define_context_for_file(__file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": { "runId": run_id_1 }}) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 1 assert run_ids[0] == run_id_1 result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": { "tags": [{ "key": "run", "value": "one" }] }}, ) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 1 assert run_ids[0] == run_id_1
def test_roundtrip_run(): run_with_snapshot = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', solid_subset=['solid_1'], step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, root_run_id='previousID', parent_run_id='previousID', pipeline_snapshot_id='pipey_mcpipeface_snapshot_id', execution_plan_snapshot_id='mcexecutionplanface_snapshot_id', ) for field in run_with_snapshot: # ensure we have a test value to round trip for each field assert field # The invariant that all the execution parameter structs # pipeline run can be constructed from each other is no longer # true. Clients of the GraphQL API cannot know the value of the # pipeline_snapshot_id prior to execution, because it is # constructed on the server. Hence these roundtrip tests # do not include snapshot_id run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None) context = define_context_for_file(__file__, 'pipey_mcpipeface', DagsterInstance.ephemeral()) exec_params = execution_params_from_pipeline_run(context, run) exec_params_gql = execution_params_from_graphql( context, exec_params.to_graphql_input()) assert exec_params_gql == exec_params empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default') exec_params = execution_params_from_pipeline_run(context, empty_run) exec_params_gql = execution_params_from_graphql( context, exec_params.to_graphql_input()) assert exec_params_gql == exec_params
def test_run_group_not_found(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) context_at_time_1 = define_context_for_file(__file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": "foo"}, ) assert result.data assert result.data["runGroupOrError"] assert result.data["runGroupOrError"][ "__typename"] == "RunGroupNotFoundError" assert result.data["runGroupOrError"]["runId"] == "foo" assert result.data["runGroupOrError"][ "message"] == "Run group of run {run_id} could not be found.".format( run_id="foo")
def test_get_asset_runs(): with seven.TemporaryDirectory() as temp_dir: instance = get_instance(temp_dir) repo = asset_repo() single_run_id = execute_pipeline( repo.get_pipeline('single_asset_pipeline'), instance=instance).run_id multi_run_id = execute_pipeline( repo.get_pipeline('multi_asset_pipeline'), instance=instance).run_id context = define_context_for_file(__file__, 'asset_repo', instance) result = execute_dagster_graphql(context, GET_ASSET_RUNS, variables={'assetKey': 'a'}) assert result.data fetched_runs = [ run['runId'] for run in result.data['assetOrError']['runs'] ] assert len(fetched_runs) == 2 assert multi_run_id in fetched_runs assert single_run_id in fetched_runs
def define_test_context(instance=None): return define_context_for_file(__file__, "define_repository", instance)
def define_test_context(instance): check.inst_param(instance, 'instance', DagsterInstance) return define_context_for_file(__file__, "test_repo", instance)
def test_run_groups_over_time(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir=tempdir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').get_pipeline_subset_def( {'solid_A'}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').get_pipeline_subset_def( {'solid_B'}), instance=instance, ).run_id context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert 'runGroupsOrError' in result.data assert 'results' in result.data['runGroupsOrError'] assert len(result.data['runGroupsOrError']['results']) == 4 t1_runs = { run['runId']: run for group in result.data['runGroupsOrError']['results'] for run in group['runs'] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': None, } # test foo_run_id assert t1_runs[foo_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'foo_pipeline', 'solidSelection': None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': ['solid_A'], } assert t1_runs[evolve_a_run_id]['pipelineSnapshotId'] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': ['solid_B'], } context_at_time_2 = define_context_for_file(__file__, 'get_repo_at_time_2', instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert 'runGroupsOrError' in result.data assert 'results' in result.data['runGroupsOrError'] assert len(result.data['runGroupsOrError']['results']) == 4 t2_runs = { run['runId']: run for group in result.data['runGroupsOrError']['results'] for run in group['runs'] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': ['solid_A'], } assert t2_runs[evolve_a_run_id]['pipelineSnapshotId'] # names same assert (t1_runs[full_evolve_run_id]['pipeline']['name'] == t2_runs[evolve_a_run_id]['pipeline']['name']) # snapshots differ assert (t1_runs[full_evolve_run_id]['pipelineSnapshotId'] != t2_runs[evolve_a_run_id]['pipelineSnapshotId']) # pipeline name changed assert t2_runs[foo_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'foo_pipeline', 'solidSelection': None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSelection': ['solid_B'], }
def test_runs_over_time(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').subset_for_execution( ['solid_A']), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').subset_for_execution( ['solid_B']), instance=instance, ).run_id context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY) assert result.data t1_runs = { run['runId']: run for run in result.data['pipelineRunsOrError']['results'] } assert t1_runs[full_evolve_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': None, } assert t1_runs[foo_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'foo_pipeline', 'solidSubset': None, } assert t1_runs[evolve_a_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': ['solid_A'], } assert t1_runs[evolve_b_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': ['solid_B'], } context_at_time_2 = define_context_for_file(__file__, 'get_repo_at_time_2', instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY) assert result.data t2_runs = { run['runId']: run for run in result.data['pipelineRunsOrError']['results'] } assert t2_runs[full_evolve_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': None, } assert t2_runs[evolve_a_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': ['solid_A'], } # pipeline name changed assert t2_runs[foo_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'foo_pipeline', 'solidSubset': None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]['pipeline'] == { '__typename': 'PipelineSnapshot', 'name': 'evolving_pipeline', 'solidSubset': ['solid_B'], }
def test_runs_over_time(): instance = DagsterInstance.local_temp() repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').build_sub_pipeline( ['solid_A']), instance=instance).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline('evolving_pipeline').build_sub_pipeline( ['solid_B']), instance=instance).run_id context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY) assert result.data t1_runs = { run['runId']: run for run in result.data['pipelineRunsOrError']['results'] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]['pipeline']['__typename'] == 'Pipeline' assert t1_runs[full_evolve_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': None, } # test foo_run_id assert t1_runs[foo_run_id]['pipeline']['__typename'] == 'Pipeline' assert t1_runs[foo_run_id]['executionSelection'] == { 'name': 'foo_pipeline', 'solidSubset': None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]['pipeline']['__typename'] == 'Pipeline' assert t1_runs[evolve_a_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': ['solid_A'], } assert t1_runs[evolve_a_run_id]['pipelineSnapshotId'] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]['pipeline']['__typename'] == 'Pipeline' assert t1_runs[evolve_b_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': ['solid_B'], } context_at_time_2 = define_context_for_file(__file__, 'get_repo_at_time_2', instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY) assert result.data t2_runs = { run['runId']: run for run in result.data['pipelineRunsOrError']['results'] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]['pipeline']['__typename'] == 'Pipeline' assert t1_runs[full_evolve_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]['pipeline']['__typename'] == 'Pipeline' assert t2_runs[evolve_a_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': ['solid_A'], } assert t2_runs[evolve_a_run_id]['pipelineSnapshotId'] # names same assert (t1_runs[full_evolve_run_id]['pipeline']['name'] == t2_runs[evolve_a_run_id]['pipeline']['name']) # snapshots differ assert (t1_runs[full_evolve_run_id]['pipelineSnapshotId'] != t2_runs[evolve_a_run_id]['pipelineSnapshotId']) # pipeline name changed assert t2_runs[foo_run_id]['pipeline']['__typename'] == 'UnknownPipeline' assert t1_runs[foo_run_id]['executionSelection'] == { 'name': 'foo_pipeline', 'solidSubset': None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]['pipeline'][ '__typename'] == 'UnknownPipeline' assert t2_runs[evolve_b_run_id]['executionSelection'] == { 'name': 'evolving_pipeline', 'solidSubset': ['solid_B'], }
def test_run_groups_over_time(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir=tempdir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id context_at_time_1 = define_context_for_file(__file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t1_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test foo_run_id assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } context_at_time_2 = define_context_for_file(__file__, "get_repo_at_time_2", instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t2_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"] # names same assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] == t2_runs[evolve_a_run_id]["pipeline"]["name"]) # snapshots differ assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] != t2_runs[evolve_a_run_id]["pipelineSnapshotId"]) # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }