def test_fetch_by_tags(run_storage_factory_cm_fn): with run_storage_factory_cm_fn() as storage: assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run( build_run( run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello', 'mytag2': 'world'}, ) ) storage.add_run( build_run( run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'goodbye', 'mytag2': 'world'}, ) ) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline')) assert len(storage.get_runs()) == 3 some_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello', 'mytag2': 'world'})) assert len(some_runs) == 1 assert some_runs[0].run_id == one some_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag2': 'world'})) assert len(some_runs) == 2 assert any(x.run_id == one for x in some_runs) assert any(x.run_id == two for x in some_runs) some_runs = storage.get_runs(PipelineRunsFilter(tags={})) assert len(some_runs) == 3
def test_fetch_count_by_tag(clean_storage): storage = clean_storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', tags={ 'mytag': 'hello', 'mytag2': 'world' })) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', tags={ 'mytag': 'goodbye', 'mytag2': 'world' })) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline')) assert len(storage.get_runs()) == 3 run_count = storage.get_runs_count( PipelineRunsFilter(tags={ 'mytag': 'hello', 'mytag2': 'world' })) assert run_count == 1 run_count = storage.get_runs_count( PipelineRunsFilter(tags={'mytag2': 'world'})) assert run_count == 2 run_count = storage.get_runs_count(PipelineRunsFilter()) assert run_count == 3
def test_fetch_count_by_tag(run_storage_factory_cm_fn): with run_storage_factory_cm_fn() as storage: assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run( build_run( run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello', 'mytag2': 'world'}, ) ) storage.add_run( build_run( run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'goodbye', 'mytag2': 'world'}, ) ) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline')) assert len(storage.get_runs()) == 3 run_count = storage.get_runs_count( filters=PipelineRunsFilter(tags={'mytag': 'hello', 'mytag2': 'world'}) ) assert run_count == 1 run_count = storage.get_runs_count(filters=PipelineRunsFilter(tags={'mytag2': 'world'})) assert run_count == 2 run_count = storage.get_runs_count() assert run_count == 3 assert storage.get_run_tags() == [('mytag', {'hello', 'goodbye'}), ('mytag2', {'world'})]
def test_fetch_by_status(clean_storage): storage = clean_storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED)) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.FAILURE)) assert { run.run_id for run in storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.NOT_STARTED)) } == {one} assert { run.run_id for run in storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.STARTED)) } == { two, three, } assert { run.run_id for run in storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.FAILURE)) } == {four} assert { run.run_id for run in storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.SUCCESS)) } == set()
def test_basic_start_pipeline_execution_with_tags(): instance = DagsterInstance.ephemeral() result = execute_dagster_graphql( define_test_context(instance=instance), START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': {'name': 'csv_hello_world'}, 'environmentConfigData': csv_hello_world_solids_config(), 'executionMetadata': {'tags': [{'key': 'dagster/test_key', 'value': 'test_value'}]}, 'mode': 'default', } }, ) assert not result.errors assert result.data assert result.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess' run = result.data['startPipelineExecution']['run'] run_id = run['runId'] assert len(run['tags']) > 0 assert any([x['key'] == 'dagster/test_key' and x['value'] == 'test_value' for x in run['tags']]) # Check run storage runs_with_tag = instance.get_runs( filters=PipelineRunsFilter(tags={'dagster/test_key': 'test_value'}) ) assert len(runs_with_tag) == 1 assert runs_with_tag[0].run_id == run_id
def resolve_runs(self, graphene_info): return [ graphene_info.schema.type_named('PipelineRun')(r) for r in graphene_info.context.instance.get_runs( filters=PipelineRunsFilter(pipeline_name=self._pipeline.name) ) ]
def resolve_runs(self, graphene_info, **kwargs): return [ graphene_info.schema.type_named('PipelineRun')(r) for r in graphene_info.context.instance.get_runs( filters=PipelineRunsFilter(tags={'dagster/schedule_name': self._schedule.name}), limit=kwargs.get('limit'), ) ]
def test_fetch_by_status_cursored(run_storage_factory_cm_fn): with run_storage_factory_cm_fn() as storage: assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run( run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED ) ) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) cursor_four_runs = storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=four ) assert len(cursor_four_runs) == 2 assert {run.run_id for run in cursor_four_runs} == {one, two} cursor_two_runs = storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=two ) assert len(cursor_two_runs) == 1 assert {run.run_id for run in cursor_two_runs} == {one} cursor_one_runs = storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=one ) assert not cursor_one_runs cursor_four_limit_one = storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=four, limit=1 ) assert len(cursor_four_limit_one) == 1 assert cursor_four_limit_one[0].run_id == two
def test_run_priority_pipeline(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir) low_done = threading.Event() hi_done = threading.Event() # enqueue low-priority tasks low_thread = threading.Thread(target=execute_on_thread, args=(tempdir, 'low_pipeline', -3, low_done)) low_thread.daemon = True low_thread.start() time.sleep( 1) # sleep so that we don't hit any sqlite concurrency issues # enqueue hi-priority tasks hi_thread = threading.Thread(target=execute_on_thread, args=(tempdir, 'hi_pipeline', 3, hi_done)) hi_thread.daemon = True hi_thread.start() time.sleep(5) # sleep to give queue time to prioritize tasks with start_celery_worker(): while not low_done.is_set() or not hi_done.is_set(): time.sleep(1) low_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name='low_pipeline')) assert len(low_runs) == 1 low_run = low_runs[0] lowstats = instance.get_run_stats(low_run.run_id) hi_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name='hi_pipeline')) assert len(hi_runs) == 1 hi_run = hi_runs[0] histats = instance.get_run_stats(hi_run.run_id) assert lowstats.start_time < histats.start_time assert lowstats.end_time > histats.end_time
def test_slice(clean_storage): storage = clean_storage one, two, three = sorted( [str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())]) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) all_runs = storage.get_runs() assert len(all_runs) == 3 sliced_runs = storage.get_runs(cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs( PipelineRunsFilter(pipeline_name='some_pipeline')) assert len(all_runs) == 3 sliced_runs = storage.get_runs( PipelineRunsFilter(pipeline_name='some_pipeline'), cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'})) assert len(all_runs) == 3 sliced_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'}), cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two
def test_fetch_by_pipeline(run_storage_factory_cm_fn): with run_storage_factory_cm_fn() as storage: assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline')) storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline')) assert len(storage.get_runs()) == 2 some_runs = storage.get_runs(PipelineRunsFilter(pipeline_name='some_pipeline')) assert len(some_runs) == 1 assert some_runs[0].run_id == one
def test_fetch_by_pipeline(clean_storage): storage = clean_storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline')) storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline')) assert len(storage.get_runs()) == 2 some_runs = storage.get_runs( PipelineRunsFilter(pipeline_name='some_pipeline')) assert len(some_runs) == 1 assert some_runs[0].run_id == one
def to_selector(self): if self.status: status = PipelineRunStatus[self.status] else: status = None return PipelineRunsFilter( run_id=self.runId, pipeline=self.pipeline, tag_key=self.tagKey, tag_value=self.tagValue, status=status, )
def _fetch_runs_by_partition(instance, partition_set_def): # query runs db for this partition set filters = PipelineRunsFilter( tags={'dagster/partition_set': partition_set_def.name}) partition_set_runs = instance.get_runs(filters) runs_by_partition = defaultdict(list) for run in partition_set_runs: runs_by_partition[run.tags['dagster/partition']].append(run) return runs_by_partition
def test_add_get_postgres_run_storage(clean_storage): run_storage = clean_storage run_id = str(uuid.uuid4()) run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id) added = run_storage.add_run(run_to_add) assert added fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.get_runs() == [run_to_add] assert run_storage.get_runs( PipelineRunsFilter(pipeline_name='pipeline_name')) == [run_to_add] assert run_storage.get_runs(PipelineRunsFilter(pipeline_name='nope')) == [] run_storage.wipe() assert run_storage.get_runs() == []
def to_selector(self): if self.status: status = PipelineRunStatus[self.status] else: status = None if self.tags: # We are wrapping self.tags in a list because dauphin.List is not marked as iterable tags = {tag['key']: tag['value'] for tag in list(self.tags)} else: tags = None return PipelineRunsFilter( run_id=self.run_id, pipeline_name=self.pipeline_name, tags=tags, status=status, )
def test_paginated_fetch(run_storage_factory_cm_fn): storage = InMemoryRunStorage() with run_storage_factory_cm_fn() as storage: assert storage one, two, three = [str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())] storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'}) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'}) ) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'}) ) all_runs = storage.get_runs() assert len(all_runs) == 3 sliced_runs = storage.get_runs(cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs(PipelineRunsFilter(pipeline_name='some_pipeline')) assert len(all_runs) == 3 sliced_runs = storage.get_runs( PipelineRunsFilter(pipeline_name='some_pipeline'), cursor=three, limit=1 ) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'})) assert len(all_runs) == 3 sliced_runs = storage.get_runs( PipelineRunsFilter(tags={'mytag': 'hello'}), cursor=three, limit=1 ) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two
def get_runs(self, filters=None, cursor=None, limit=None): filters = check.opt_inst_param( filters, 'filters', PipelineRunsFilter, default=PipelineRunsFilter() ) check.opt_str_param(cursor, 'cursor') check.opt_int_param(limit, 'limit') # If we have a tags filter, then we need to select from a joined table if filters.tags: base_query = db.select([RunsTable.c.run_body]).select_from( RunsTable.outerjoin(RunTagsTable, RunsTable.c.run_id == RunTagsTable.c.run_id) ) else: base_query = db.select([RunsTable.c.run_body]).select_from(RunsTable) query = self._add_filters_to_query(base_query, filters) query = self._add_cursor_limit_to_query(query, cursor, limit) rows = self.execute(query) return self._rows_to_runs(rows)
def last_empty_partition(context, partition_set_def): check.inst_param(context, 'context', ScheduleExecutionContext) partition_set_def = check.inst_param( partition_set_def, 'partition_set_def', PartitionSetDefinition ) partitions = partition_set_def.get_partitions() if not partitions: return None selected = None for partition in reversed(partitions): filters = PipelineRunsFilter( tags={ "dagster/partition": partition.name, 'dagster/partition_set': partition_set_def.name, } ) matching = context.instance.get_runs(filters) if not any(run.status == PipelineRunStatus.SUCCESS for run in matching): selected = partition break return selected
def get_runs_count(self, filters=None): filters = check.opt_inst_param( filters, 'filters', PipelineRunsFilter, default=PipelineRunsFilter() ) # If we have a tags filter, then we need to select from a joined table if filters.tags: subquery = db.select([1]).select_from( RunsTable.outerjoin(RunTagsTable, RunsTable.c.run_id == RunTagsTable.c.run_id) ) else: subquery = db.select([1]).select_from(RunsTable) subquery = self._add_filters_to_query(subquery, filters) # We use an alias here because Postgres requires subqueries to be # aliased. subquery = subquery.alias("subquery") query = db.select([db.func.count()]).select_from(subquery) rows = self.execute(query) count = rows[0][0] return count
def resolve_runs_count(self, graphene_info): return graphene_info.context.instance.get_runs_count( filter=PipelineRunsFilter(tags=[("dagster/schedule_id", self._schedule.schedule_id)]) )
def resolve_runs_count(self, graphene_info): return graphene_info.context.instance.get_runs_count( filters=PipelineRunsFilter( tags={"dagster/schedule_name": self._schedule.name}))
def test_fetch_by_filter(clean_storage): storage = clean_storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run( build_run( run_id=one, pipeline_name='some_pipeline', tags={ 'tag': 'hello', 'tag2': 'world' }, status=PipelineRunStatus.SUCCESS, )) storage.add_run( build_run( run_id=two, pipeline_name='some_pipeline', tags={'tag': 'hello'}, status=PipelineRunStatus.FAILURE, ), ) storage.add_run( build_run(run_id=three, pipeline_name='other_pipeline', status=PipelineRunStatus.SUCCESS)) assert len(storage.get_runs()) == 3 some_runs = storage.get_runs(PipelineRunsFilter(run_id=one)) count = storage.get_runs_count(PipelineRunsFilter(run_id=one)) assert len(some_runs) == 1 assert count == 1 assert some_runs[0].run_id == one some_runs = storage.get_runs( PipelineRunsFilter(pipeline_name='some_pipeline')) count = storage.get_runs_count( PipelineRunsFilter(pipeline_name='some_pipeline')) assert len(some_runs) == 2 assert count == 2 assert any(x.run_id == one for x in some_runs) assert any(x.run_id == two for x in some_runs) some_runs = storage.get_runs( PipelineRunsFilter(status=PipelineRunStatus.SUCCESS)) count = storage.get_runs_count( PipelineRunsFilter(status=PipelineRunStatus.SUCCESS)) assert len(some_runs) == 2 assert count == 2 assert any(x.run_id == one for x in some_runs) assert any(x.run_id == three for x in some_runs) some_runs = storage.get_runs(PipelineRunsFilter(tags={'tag': 'hello'})) count = storage.get_runs_count(PipelineRunsFilter(tags={'tag': 'hello'})) assert len(some_runs) == 2 assert count == 2 assert any(x.run_id == one for x in some_runs) assert any(x.run_id == two for x in some_runs) some_runs = storage.get_runs( PipelineRunsFilter(tags={ 'tag': 'hello', 'tag2': 'world' })) count = storage.get_runs_count( PipelineRunsFilter(tags={ 'tag': 'hello', 'tag2': 'world' })) assert len(some_runs) == 1 assert count == 1 assert some_runs[0].run_id == one some_runs = storage.get_runs( PipelineRunsFilter(pipeline_name="some_pipeline", tags={'tag': 'hello'})) count = storage.get_runs_count( PipelineRunsFilter(pipeline_name="some_pipeline", tags={'tag': 'hello'})) assert len(some_runs) == 2 assert count == 2 assert any(x.run_id == one for x in some_runs) assert any(x.run_id == two for x in some_runs) some_runs = storage.get_runs( PipelineRunsFilter( pipeline_name="some_pipeline", tags={'tag': 'hello'}, status=PipelineRunStatus.SUCCESS, )) count = storage.get_runs_count( PipelineRunsFilter( pipeline_name="some_pipeline", tags={'tag': 'hello'}, status=PipelineRunStatus.SUCCESS, )) assert len(some_runs) == 1 assert count == 1 assert some_runs[0].run_id == one # All filters some_runs = storage.get_runs( PipelineRunsFilter( run_id=one, pipeline_name="some_pipeline", tags={'tag': 'hello'}, status=PipelineRunStatus.SUCCESS, )) count = storage.get_runs_count( PipelineRunsFilter( run_id=one, pipeline_name="some_pipeline", tags={'tag': 'hello'}, status=PipelineRunStatus.SUCCESS, )) assert len(some_runs) == 1 assert count == 1 assert some_runs[0].run_id == one some_runs = storage.get_runs(PipelineRunsFilter()) count = storage.get_runs_count(PipelineRunsFilter()) assert len(some_runs) == 3 assert count == 3