def test_single_write_read_with_snapshot(self, storage): if not isinstance(storage, InMemoryRunStorage): pytest.skip() run_with_snapshot_id = 'lkasjdflkjasdf' pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun.create_empty_run( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def create_run_with_snapshot(self, create_run_args): check.inst_param(create_run_args, 'create_run_args', InstanceCreateRunArgs) from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id snapshot_id = create_pipeline_snapshot_id( create_run_args.pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot(snapshot_id): returned_snapshot_id = self._run_storage.add_pipeline_snapshot( create_run_args.pipeline_snapshot) check.invariant(snapshot_id == returned_snapshot_id) return self.create_run( PipelineRun( pipeline_name=create_run_args.pipeline_snapshot.name, pipeline_snapshot_id=snapshot_id, run_id=create_run_args.run_id, environment_dict=create_run_args.environment_dict, mode=create_run_args.mode, selector=create_run_args.selector, step_keys_to_execute=create_run_args.step_keys_to_execute, status=create_run_args.status, tags=create_run_args.tags, parent_run_id=create_run_args.parent_run_id, root_run_id=create_run_args.root_run_id, ))
def add_pipeline_snapshot(self, pipeline_snapshot): check.inst_param(pipeline_snapshot, 'pipeline_snapshot', PipelineSnapshot) return self._add_snapshot( snapshot_id=create_pipeline_snapshot_id(pipeline_snapshot), snapshot_obj=pipeline_snapshot, snapshot_type=SnapshotType.PIPELINE, )
def test_empty_pipeline_snap_props(snapshot): pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline()) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description is None assert pipeline_snapshot.tags == {} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def add_pipeline_snapshot(self, pipeline_snapshot): check.inst_param(pipeline_snapshot, 'pipeline_snapshot', PipelineSnapshot) with self.connect() as conn: snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) snapshot_insert = SnapshotsTable.insert().values( # pylint: disable=no-value-for-parameter snapshot_id=snapshot_id, snapshot_body=zlib.compress( serialize_dagster_namedtuple(pipeline_snapshot).encode()), snapshot_type='PIPELINE', ) conn.execute(snapshot_insert) return snapshot_id
def test_create_noop_execution_plan_with_tags(snapshot): @solid(tags={'foo': 'bar', 'bar': 'baaz'}) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_noop_execution_plan(snapshot): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_pipeline_snapshot(): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() instance = DagsterInstance.local_temp() result = execute_pipeline(noop_pipeline, instance=instance) assert result.success run = instance.get_run_by_id(result.run_id) assert run.pipeline_snapshot_id == create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot())
def test_pipeline_snap_all_props(snapshot): @solid def noop_solid(_): pass @pipeline(description='desc', tags={'key': 'value'}) def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description == 'desc' assert pipeline_snapshot.tags == {'key': 'value'} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_basic_dep_fan_out(snapshot): @solid def return_one(_): return 1 @solid(input_defs=[InputDefinition('value', int)]) def passthrough(_, value): return value @pipeline def single_dep_pipeline(): return_one_result = return_one() passthrough.alias('passone')(return_one_result) passthrough.alias('passtwo')(return_one_result) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( single_dep_pipeline) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation('return_one') assert index.get_invocation('passone') assert index.get_invocation('passtwo') assert index.get_upstream_output('passone', 'value') == OutputHandleSnap( 'return_one', 'result') assert index.get_upstream_output('passtwo', 'value') == OutputHandleSnap( 'return_one', 'result') assert set(index.get_downstream_inputs('return_one', 'result')) == set([ InputHandle('passthrough', 'passone', 'value'), InputHandle('passthrough', 'passtwo', 'value'), ]) assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_two_invocations_deps_snap(snapshot): @solid def noop_solid(_): pass @pipeline def two_solid_pipeline(): noop_solid.alias('one')() noop_solid.alias('two')() index = DependencyStructureIndex( build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline)) assert index.get_invocation('one') assert index.get_invocation('two') pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_create_execution_plan_with_dep(snapshot): @solid def solid_one(_): return 1 @solid def solid_two(_, num): return num + 1 @pipeline def noop_pipeline(): solid_two(solid_one()) execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_with_composite(snapshot): @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)]) def return_one(_): return 1 @solid( input_defs=[InputDefinition(name='num', dagster_type=int)], output_defs=[OutputDefinition(int)], ) def add_one(_, num): return num + 1 @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_1(): return add_one(return_one()) @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_2(): return add_one(return_one()) @solid def add(_, num_one, num_two): return num_one + num_two @pipeline def do_comps(): add(num_one=comp_1(), num_two=comp_2()) execution_plan = create_execution_plan(do_comps) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( do_comps.get_pipeline_snapshot()))))
def test_basic_fan_in(snapshot): @solid(output_defs=[OutputDefinition(Nothing)]) def return_nothing(_): return None @solid(input_defs=[InputDefinition('nothing', Nothing)]) def take_nothings(_): return None @pipeline def fan_in_test(): take_nothings([ return_nothing.alias('nothing_one')(), return_nothing.alias('nothing_two')() ]) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( fan_in_test) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation('nothing_one') assert index.get_invocation('take_nothings') assert index.get_upstream_outputs('take_nothings', 'nothing') == [ OutputHandleSnap('nothing_one', 'result'), OutputHandleSnap('nothing_two', 'result'), ] assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def add_pipeline_snapshot(self, pipeline_snapshot): check.inst_param(pipeline_snapshot, 'pipeline_snapshot', PipelineSnapshot) pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) self._pipeline_snapshots[pipeline_snapshot_id] = pipeline_snapshot return pipeline_snapshot_id
def get_or_create_run( self, pipeline_name=None, run_id=None, environment_dict=None, mode=None, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, ): if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=selector, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) if self.has_run(pipeline_run.run_id): candidate_run = self.get_run_by_id(pipeline_run.run_id) field_diff = _check_run_equality(pipeline_run, candidate_run) if field_diff: raise DagsterRunConflict( 'Found conflicting existing run with same id {run_id}. Runs differ in:' '\n{field_diff}'.format( run_id=pipeline_run.run_id, field_diff=_format_field_diff(field_diff), ), ) return candidate_run return self._run_storage.add_run(pipeline_run)