def test_counter(): @traced async def foo(): pass @traced async def bar(): pass async def call_foo(num): await asyncio.gather(*[foo() for _ in range(num)]) async def call_bar(num): await asyncio.gather(*[bar() for _ in range(num)]) async def run(): await call_foo(10) await call_foo(10) await call_bar(10) traced_counter.set(Counter()) loop = asyncio.get_event_loop() loop.run_until_complete(run()) counter = traced_counter.get() assert isinstance(counter, Counter) counts = counter.counts() assert counts["foo"] == 20 assert counts["bar"] == 10
def test_asset_batching(): with instance_for_test() as instance: repo = get_asset_repo() foo_job = repo.get_job("foo_job") for _ in range(3): foo_job.execute_in_process(instance=instance) with define_out_of_process_context(__file__, "asset_repo", instance) as context: traced_counter.set(Counter()) result = execute_dagster_graphql( context, ASSET_RUNS_QUERY, variables={"assetKey": { "path": ["foo"] }}) assert result.data assert "assetOrError" in result.data assert "assetMaterializations" in result.data["assetOrError"] materializations = result.data["assetOrError"][ "assetMaterializations"] assert len(materializations) == 3 counter = traced_counter.get() counts = counter.counts() assert counts assert counts.get("DagsterInstance.get_run_records") == 1
def test_repository_batching(graphql_context): instance = graphql_context.instance if not instance.supports_batch_tick_queries or not instance.supports_bucket_queries: pytest.skip("storage cannot batch fetch") traced_counter.set(Counter()) selector = infer_repository_selector(graphql_context) result = execute_dagster_graphql( graphql_context, REPOSITORY_SENSORS_QUERY, variables={"repositorySelector": selector}, ) assert result.data assert "repositoryOrError" in result.data assert "sensors" in result.data["repositoryOrError"] counter = traced_counter.get() counts = counter.counts() assert counts assert len(counts) == 3 # We should have a single batch call to fetch run records (to fetch sensor runs) and a single # batch call to fetch instigator state, instead of separate calls for each sensor (~5 distinct # sensors in the repo) # 1) `get_run_records` is fetched to instantiate GrapheneRun # 2) `all_instigator_state` is fetched to instantiate GrapheneSensor assert counts.get("DagsterInstance.get_run_records") == 1 assert counts.get("DagsterInstance.all_instigator_state") == 1 assert counts.get("DagsterInstance.get_batch_ticks") == 1
def test_repository_batching(): with instance_for_test() as instance: repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline("foo_pipeline") evolving_pipeline = repo.get_pipeline("evolving_pipeline") foo_run_ids = [execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3)] evolving_run_ids = [ execute_pipeline(evolving_pipeline, instance=instance).run_id for i in range(2) ] with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: traced_counter.set(Counter()) result = execute_dagster_graphql( context, REPOSITORY_RUNS_QUERY, variables={"repositorySelector": infer_repository_selector(context)}, ) assert result.data assert "repositoryOrError" in result.data assert "pipelines" in result.data["repositoryOrError"] pipelines = result.data["repositoryOrError"]["pipelines"] assert len(pipelines) == 2 pipeline_runs = {pipeline["name"]: pipeline["runs"] for pipeline in pipelines} assert len(pipeline_runs["foo_pipeline"]) == 3 assert len(pipeline_runs["evolving_pipeline"]) == 2 assert set(foo_run_ids) == set(run["runId"] for run in pipeline_runs["foo_pipeline"]) assert set(evolving_run_ids) == set( run["runId"] for run in pipeline_runs["evolving_pipeline"] ) counter = traced_counter.get() counts = counter.counts() assert counts assert len(counts) == 1 # We should have a single batch call to fetch run records, instead of 3 separate calls # to fetch run records (which is fetched to instantiate GrapheneRun) assert counts.get("DagsterInstance.get_run_records") == 1
def send_wrapper(message: Message): if message["type"] == "http.response.start": counter = traced_counter.get() if counter and isinstance(counter, Counter): headers = MutableHeaders(scope=message) headers.append("x-dagster-call-counts", json.dumps(counter.counts())) return send(message)
def test_repository_batching(graphql_context): traced_counter.set(Counter()) selector = infer_repository_selector(graphql_context) result = execute_dagster_graphql( graphql_context, REPOSITORY_SCHEDULES_QUERY, variables={"repositorySelector": selector}, ) assert result.data assert "repositoryOrError" in result.data assert "schedules" in result.data["repositoryOrError"] counter = traced_counter.get() counts = counter.counts() assert counts assert len(counts) == 2 # We should have a single batch call to fetch run records (to fetch schedule runs) and a single # batch call to fetch instigator state, instead of separate calls for each schedule (~18 # distinct schedules in the repo) # 1) `get_run_records` is fetched to instantiate GrapheneRun # 2) `all_instigator_state` is fetched to instantiate GrapheneSchedule assert counts.get("DagsterInstance.get_run_records") == 1 assert counts.get("DagsterInstance.all_instigator_state") == 1
def return_counts(response): counter = traced_counter.get() if counter and isinstance(counter, Counter): response.headers["x-dagster-call-counts"] = json.dumps( counter.counts()) return response