def test_asset_batching(): with instance_for_test() as instance: repo = get_asset_repo() foo_job = repo.get_job("foo_job") for _ in range(3): foo_job.execute_in_process(instance=instance) with define_out_of_process_context(__file__, "asset_repo", instance) as context: traced_counter.set(Counter()) result = execute_dagster_graphql( context, ASSET_RUNS_QUERY, variables={"assetKey": { "path": ["foo"] }}) assert result.data assert "assetOrError" in result.data assert "assetMaterializations" in result.data["assetOrError"] materializations = result.data["assetOrError"][ "assetMaterializations"] assert len(materializations) == 3 counter = traced_counter.get() counts = counter.counts() assert counts assert counts.get("DagsterInstance.get_run_records") == 1
def test_filtered_runs(): with instance_for_test() as instance: repo = get_repo_at_time_1() run_id_1 = execute_pipeline( repo.get_pipeline("foo_pipeline"), instance=instance, tags={"run": "one"} ).run_id _run_id_2 = execute_pipeline( repo.get_pipeline("foo_pipeline"), instance=instance, tags={"run": "two"} ).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": {"runId": run_id_1}} ) assert result.data run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]] assert len(run_ids) == 1 assert run_ids[0] == run_id_1 result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": {"tags": [{"key": "run", "value": "one"}]}}, ) assert result.data run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]] assert len(run_ids) == 1 assert run_ids[0] == run_id_1
def test_run_groups(): with instance_for_test() as instance: repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline("foo_pipeline") root_run_ids = [ execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3) ] for _ in range(5): for root_run_id in root_run_ids: execute_pipeline( foo_pipeline, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, instance=instance, ) with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context_at_time_1: result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 3 for run_group in result.data["runGroupsOrError"]["results"]: assert run_group["rootRunId"] in root_run_ids assert len(run_group["runs"]) == 6
def test_filtered_runs_multiple_statuses(): with instance_for_test() as instance: repo = get_repo_at_time_1() _ = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED).run_id run_id_2 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE).run_id run_id_3 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.SUCCESS).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": { "statuses": ["FAILURE", "SUCCESS"] }}, ) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 2 assert run_id_2 in run_ids assert run_id_3 in run_ids
def test_paginated_runs_query(): with instance_for_test() as instance: repo = get_repo() _ = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED).run_id run_id_2 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE).run_id run_id_3 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.SUCCESS).run_id with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql( context, PAGINATED_RUNS_QUERY, variables={ "cursor": run_id_3, "limit": 1 }, ) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 1 assert run_ids[0] == run_id_2
def test_filtered_runs_multiple_filters(): with instance_for_test() as instance: repo = get_repo_at_time_1() started_run_with_tags = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED, tags={"foo": "bar"} ) failed_run_with_tags = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE, tags={"foo": "bar"} ) started_run_without_tags = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED, tags={"baz": "boom"}, ) with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={ "filter": {"statuses": ["STARTED"], "tags": [{"key": "foo", "value": "bar"}]} }, ) assert result.data run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]] assert len(run_ids) == 1 assert started_run_with_tags.run_id in run_ids assert failed_run_with_tags.run_id not in run_ids assert started_run_without_tags.run_id not in run_ids
def test_repository_batching(): with instance_for_test() as instance: repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline("foo_pipeline") evolving_pipeline = repo.get_pipeline("evolving_pipeline") foo_run_ids = [execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3)] evolving_run_ids = [ execute_pipeline(evolving_pipeline, instance=instance).run_id for i in range(2) ] with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: traced_counter.set(Counter()) result = execute_dagster_graphql( context, REPOSITORY_RUNS_QUERY, variables={"repositorySelector": infer_repository_selector(context)}, ) assert result.data assert "repositoryOrError" in result.data assert "pipelines" in result.data["repositoryOrError"] pipelines = result.data["repositoryOrError"]["pipelines"] assert len(pipelines) == 2 pipeline_runs = {pipeline["name"]: pipeline["runs"] for pipeline in pipelines} assert len(pipeline_runs["foo_pipeline"]) == 3 assert len(pipeline_runs["evolving_pipeline"]) == 2 assert set(foo_run_ids) == set(run["runId"] for run in pipeline_runs["foo_pipeline"]) assert set(evolving_run_ids) == set( run["runId"] for run in pipeline_runs["evolving_pipeline"] ) counter = traced_counter.get() counts = counter.counts() assert counts assert len(counts) == 1 # We should have a single batch call to fetch run records, instead of 3 separate calls # to fetch run records (which is fetched to instantiate GrapheneRun) assert counts.get("DagsterInstance.get_run_records") == 1
def test_repositories_query(): with instance_for_test() as instance: with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql(context, REPOSITORIES_QUERY) assert not result.errors assert result.data repositories = result.data["repositoriesOrError"]["nodes"] assert len(repositories) == 1 assert repositories[0]["name"] == "my_repo"
def test_pipelines_query(): with instance_for_test() as instance: with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql( context, PIPELINES_QUERY, variables={ "repositoryLocationName": "test_location", "repositoryName": "my_repo", }, ) assert not result.errors assert result.data pipelines = result.data["repositoryOrError"]["pipelines"] assert len(pipelines) == 2
def test_filtered_runs_count(): with instance_for_test() as instance: repo = get_repo_at_time_1() instance.create_run_for_pipeline( # pylint: disable=expression-not-assigned repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED ).run_id instance.create_run_for_pipeline( # pylint: disable=expression-not-assigned repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE ).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: result = execute_dagster_graphql( context, FILTERED_RUN_COUNT_QUERY, variables={"filter": {"statuses": ["FAILURE"]}} ) assert result.data count = result.data["pipelineRunsOrError"]["count"] assert count == 1
def test_run_group_not_found(): with instance_for_test() as instance: with define_out_of_process_context( __file__, "get_repo_at_time_1", instance ) as context_at_time_1: result = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": "foo"}, ) assert result.data assert result.data["runGroupOrError"] assert result.data["runGroupOrError"]["__typename"] == "RunGroupNotFoundError" assert result.data["runGroupOrError"]["runId"] == "foo" assert result.data["runGroupOrError"][ "message" ] == "Run group of run {run_id} could not be found.".format(run_id="foo")
def test_run_group(): with instance_for_test() as instance: repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline("foo_pipeline") runs = [execute_pipeline(foo_pipeline, instance=instance)] root_run_id = runs[-1].run_id for _ in range(3): # https://github.com/dagster-io/dagster/issues/2433 run = instance.create_run_for_pipeline( foo_pipeline, parent_run_id=root_run_id, root_run_id=root_run_id, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, ) execute_run(InMemoryPipeline(foo_pipeline), run, instance) runs.append(run) with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context_at_time_1: result_one = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": root_run_id}, ) assert result_one.data["runGroupOrError"][ "__typename"] == "RunGroup" assert len(result_one.data["runGroupOrError"]["runs"]) == 4 result_two = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": runs[-1].run_id}, ) assert result_one.data["runGroupOrError"][ "__typename"] == "RunGroup" assert len(result_two.data["runGroupOrError"]["runs"]) == 4 assert (result_one.data["runGroupOrError"]["rootRunId"] == result_two.data["runGroupOrError"]["rootRunId"]) assert (result_one.data["runGroupOrError"]["runs"] == result_two.data["runGroupOrError"]["runs"])
def test_run_group_not_found(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) context_at_time_1 = define_out_of_process_context( __file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": "foo"}, ) assert result.data assert result.data["runGroupOrError"] assert result.data["runGroupOrError"][ "__typename"] == "RunGroupNotFoundError" assert result.data["runGroupOrError"]["runId"] == "foo" assert result.data["runGroupOrError"][ "message"] == "Run group of run {run_id} could not be found.".format( run_id="foo")
def test_launch_preset_mutation(): with instance_for_test() as instance: with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql( context, LAUNCH_PIPELINE_PRESET, variables={ "repositoryLocationName": "test_location", "repositoryName": "my_repo", "pipelineName": "foo_pipeline", "presetName": "my_preset", }, ) assert not result.errors assert result.data run = result.data["launchPipelineExecution"]["run"] assert run assert run["runId"]
def test_runs_query(): with instance_for_test() as instance: repo = get_repo() run_id_1 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED).run_id run_id_2 = instance.create_run_for_pipeline( repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE).run_id with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql(context, RUNS_QUERY) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 2 assert run_ids[0] == run_id_2 assert run_ids[1] == run_id_1
def test_launch_mutation_error(): with instance_for_test() as instance: with define_out_of_process_context(__file__, "get_repo", instance) as context: result = execute_dagster_graphql( context, LAUNCH_PIPELINE, variables={ "repositoryLocationName": "test_location", "repositoryName": "my_repo", "pipelineName": "foo_pipeline", "runConfigData": { "invalid": "config" }, "mode": "default", }, ) assert not result.errors assert result.data errors = result.data["launchPipelineExecution"]["errors"] assert len(errors) == 1 message = errors[0]["message"] assert message
def define_test_out_of_process_context(instance): check.inst_param(instance, "instance", DagsterInstance) with define_out_of_process_context(__file__, main_repo_name(), instance) as context: yield context
def define_test_out_of_process_context(instance): check.inst_param(instance, 'instance', DagsterInstance) return define_out_of_process_context(__file__, "test_repo", instance)
def test_runs_over_time(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id context_at_time_1 = define_out_of_process_context( __file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY) assert result.data t1_runs = { run["runId"]: run for run in result.data["pipelineRunsOrError"]["results"] } assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } context_at_time_2 = define_out_of_process_context( __file__, "get_repo_at_time_2", instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY) assert result.data t2_runs = { run["runId"]: run for run in result.data["pipelineRunsOrError"]["results"] } assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }
def test_run_groups_over_time(): with instance_for_test() as instance: repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context_at_time_1: result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t1_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test foo_run_id assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } with define_out_of_process_context(__file__, "get_repo_at_time_2", instance) as context_at_time_2: result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t2_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"] # names same assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] == t2_runs[evolve_a_run_id]["pipeline"]["name"]) # snapshots differ assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] != t2_runs[evolve_a_run_id]["pipelineSnapshotId"]) # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }