def test_0_10_0_schedule_wipe(): src_dir = file_relative_path(__file__, "snapshot_0_10_0_wipe_schedules/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "b22f16781a7c" assert "schedules" in get_sqlite3_tables(db_path) assert "schedule_ticks" in get_sqlite3_tables(db_path) assert "jobs" not in get_sqlite3_tables(db_path) assert "job_ticks" not in get_sqlite3_tables(db_path) with pytest.raises(DagsterInstanceMigrationRequired): with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.optimize_for_dagit(statement_timeout=500) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "schedules" not in get_sqlite3_tables(db_path) assert "schedule_ticks" not in get_sqlite3_tables(db_path) assert "jobs" in get_sqlite3_tables(db_path) assert "job_ticks" in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as upgraded_instance: assert len(upgraded_instance.all_stored_job_state()) == 0
def test_0_10_0_schedule_wipe(): src_dir = file_relative_path(__file__, "snapshot_0_10_0_wipe_schedules/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "b22f16781a7c" assert "schedules" in get_sqlite3_tables(db_path) assert "schedule_ticks" in get_sqlite3_tables(db_path) assert "jobs" not in get_sqlite3_tables(db_path) assert "job_ticks" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "schedules" not in get_sqlite3_tables(db_path) assert "schedule_ticks" not in get_sqlite3_tables(db_path) assert "jobs" in get_sqlite3_tables(db_path) assert "job_ticks" in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as upgraded_instance: assert len(upgraded_instance.all_instigator_state()) == 0
def test_get_run_by_id(): with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) assert instance.get_runs() == [] pipeline_run = PipelineRun("foo_pipeline", "new_run") assert instance.get_run_by_id(pipeline_run.run_id) is None instance._run_storage.add_run(pipeline_run) # pylint: disable=protected-access assert instance.get_runs() == [pipeline_run] assert instance.get_run_by_id(pipeline_run.run_id) == pipeline_run # Run is created after we check whether it exists with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun(pipeline_name="foo_pipeline", run_id="bar_run") def _has_run(self, run_id): # This is uglier than we would like because there is no nonlocal keyword in py2 global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun(pipeline_name="foo_pipeline", run_id=run_id)) return False else: return self._run_storage.has_run(run_id) instance.has_run = types.MethodType(_has_run, instance) assert instance.get_run_by_id(run.run_id) is None # Run is created after we check whether it exists, but deleted before we can get it global MOCK_HAS_RUN_CALLED # pylint:disable=global-statement MOCK_HAS_RUN_CALLED = False with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun(pipeline_name="foo_pipeline", run_id="bar_run") def _has_run(self, run_id): global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun(pipeline_name="foo_pipeline", run_id=run_id)) MOCK_HAS_RUN_CALLED = True return False elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED: MOCK_HAS_RUN_CALLED = False return True else: return False instance.has_run = types.MethodType(_has_run, instance) assert instance.get_run_by_id(run.run_id) is None
def test_get_or_create_run(): with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') assert instance.get_or_create_run(run) == run assert instance.has_run(run.run_id) assert instance.get_or_create_run(run) == run # Run is created after we check whether it exists with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') def _has_run(self, run_id): # This is uglier than we would like because there is no nonlocal keyword in py2 global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) return False else: return self._run_storage.has_run(run_id) instance.has_run = types.MethodType(_has_run, instance) assert instance.get_or_create_run(run) == run # Run is created after we check whether it exists, but deleted before we can get it global MOCK_HAS_RUN_CALLED # pylint:disable=global-statement MOCK_HAS_RUN_CALLED = False with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') def _has_run(self, run_id): global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) MOCK_HAS_RUN_CALLED = True return False elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED: MOCK_HAS_RUN_CALLED = False return True else: return False instance.has_run = types.MethodType(_has_run, instance) with pytest.raises(check.CheckError, match='Inconsistent run storage'): instance.get_or_create_run(run)
def test_run_created_in_0_7_9_snapshot_id_change(): src_dir = file_relative_path( __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6' old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d" old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d" historical_pipeline = instance.get_historical_pipeline( old_pipeline_snapshot_id) pipeline_snapshot = historical_pipeline.pipeline_snapshot ep_snapshot = instance.get_execution_plan_snapshot( old_execution_plan_snapshot_id) # It is the pipeline snapshot that changed # Verify that snapshot ids are not equal. This changed in 0.7.10 created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) assert created_snapshot_id != old_pipeline_snapshot_id # verify that both are accessible off of the historical pipeline assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id # We also changed execution plan schema in 0.7.11.post1 assert create_execution_plan_snapshot_id( ep_snapshot) != old_execution_plan_snapshot_id # This previously failed with a check error assert ExternalExecutionPlan(ep_snapshot)
def test_backcompat_get_asset_records(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized def _validate_materialization(asset_key, event, expected_tags): assert isinstance(event, EventLogEntry) assert event.dagster_event assert event.dagster_event.is_step_materialization assert event.dagster_event.step_materialization_data.materialization.asset_key == asset_key assert event.dagster_event.step_materialization_data.materialization.tags == expected_tags b = AssetKey("b") with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage records = storage.get_asset_records([b]) asset_entry = records[0].asset_entry assert asset_entry.asset_key == b _validate_materialization(b, asset_entry.last_materialization, expected_tags={})
def test_input_manager_with_failure(): @root_input_manager def should_fail(_): raise Failure( description="Foolure", metadata_entries=[ EventMetadataEntry.text(label="label", text="text", description="description") ], ) @solid(input_defs=[InputDefinition("_fail_input", root_manager_key="should_fail")]) def fail_on_input(_, _fail_input): assert False, "should not be called" @pipeline(mode_defs=[ModeDefinition(resource_defs={"should_fail": should_fail})]) def simple(): fail_on_input() with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) assert not result.success failure_data = result.result_for_solid("fail_on_input").failure_data assert failure_data.error.cls_name == "Failure" assert failure_data.user_failure_data.description == "Foolure" assert failure_data.user_failure_data.metadata_entries[0].label == "label" assert failure_data.user_failure_data.metadata_entries[0].entry_data.text == "text" assert failure_data.user_failure_data.metadata_entries[0].description == "description"
def test_run_created_in_0_7_9_snapshot_id_change(): test_dir = file_relative_path( __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite') with restore_directory(test_dir): instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6' old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d' old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d' pipeline_snapshot = instance.get_pipeline_snapshot( old_pipeline_snapshot_id) ep_snapshot = instance.get_execution_plan_snapshot( old_execution_plan_snapshot_id) # It is the pipeline snapshot that changed # Verify that snapshot ids are not equal. This changed in 0.7.10 assert create_pipeline_snapshot_id( pipeline_snapshot) != old_pipeline_snapshot_id # We also changed execution plan schema in 0.7.11.post1 assert create_execution_plan_snapshot_id( ep_snapshot) != old_execution_plan_snapshot_id # This previously failed with a check error assert ExecutionPlanIndex(ep_snapshot, PipelineIndex(pipeline_snapshot))
def test_snapshot_0_7_6_pre_add_pipeline_snapshot(): run_id = "fb0b3905-068b-4444-8f00-76fcbaef7e8b" test_dir = file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite") with restore_directory(test_dir): # invariant check to make sure migration has not been run yet db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises( DagsterInstanceMigrationRequired, match=_run_storage_migration_regex( current_revision="9fe9e746268c"), ): execute_pipeline(noop_pipeline, instance=instance) assert len(instance.get_runs()) == 1 # Make sure the schema is migrated instance.upgrade() assert get_current_alembic_version(db_path) == "c63a27054f08" assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type" } == set(get_sqlite3_columns(db_path, "snapshots")) assert len(instance.get_runs()) == 1 run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def test_0_6_6_sqlite_migrate(): test_dir = file_relative_path(__file__, 'snapshot_0_6_6/sqlite') assert os.path.exists( file_relative_path(__file__, 'snapshot_0_6_6/sqlite/runs.db')) assert not os.path.exists( file_relative_path(__file__, 'snapshot_0_6_6/sqlite/history/runs.db')) with restore_directory(test_dir): instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 run_ids = instance._event_storage.get_all_run_ids() assert run_ids == ['89296095-892d-4a15-aa0d-9018d1580945'] instance._event_storage.get_logs_for_run( '89296095-892d-4a15-aa0d-9018d1580945') assert not os.path.exists( file_relative_path(__file__, 'snapshot_0_6_6/sqlite/runs.db')) assert os.path.exists( file_relative_path(__file__, 'snapshot_0_6_6/sqlite/history/runs.db'))
def test_start_time_end_time(): src_dir = file_relative_path(__file__, "snapshot_0_13_12_pre_add_start_time_and_end_time") with copy_directory(src_dir) as test_dir: @job def _test(): pass db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5" assert "start_time" not in set(get_sqlite3_columns(db_path, "runs")) assert "end_time" not in set(get_sqlite3_columns(db_path, "runs")) # this migration was optional, so make sure things work before migrating instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert "start_time" not in set(get_sqlite3_columns(db_path, "runs")) assert "end_time" not in set(get_sqlite3_columns(db_path, "runs")) assert instance.get_run_records() assert instance.create_run_for_pipeline(_test) instance.upgrade() # Make sure the schema is migrated assert "start_time" in set(get_sqlite3_columns(db_path, "runs")) assert instance.get_run_records() assert instance.create_run_for_pipeline(_test) instance._run_storage._alembic_downgrade(rev="7f2b1a4ca7a5") assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5" assert True
def test_run_step_stats_with_retries(): _called = None @pipeline def simple(): @solid def should_succeed(context): context.log.info("succeed") return "yay" @solid(input_defs=[InputDefinition("_input", str)], output_defs=[OutputDefinition(str)]) def should_retry(context, _input): raise RetryRequested(max_retries=3) @solid def should_not_execute(_, x): _called = True return x should_not_execute(should_retry(should_succeed())) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) step_stats = instance.get_run_step_stats( result.run_id, step_keys=["should_retry.compute"]) assert len(step_stats) == 1 assert step_stats[0].step_key == "should_retry.compute" assert step_stats[0].status == StepEventStatus.FAILURE assert step_stats[0].end_time > step_stats[0].start_time assert step_stats[0].attempts == 4 assert not _called
def test_run_created_in_0_7_9_snapshot_id_change(): test_dir = file_relative_path( __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite') with restore_directory(test_dir): instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6' old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d' old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d' with pytest.warns( UserWarning, match=re.escape( '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use ' '"loader_schema_key" instead.'), ): historical_pipeline = instance.get_historical_pipeline( old_pipeline_snapshot_id) pipeline_snapshot = historical_pipeline.pipeline_snapshot ep_snapshot = instance.get_execution_plan_snapshot( old_execution_plan_snapshot_id) # It is the pipeline snapshot that changed # Verify that snapshot ids are not equal. This changed in 0.7.10 assert create_pipeline_snapshot_id( pipeline_snapshot) != old_pipeline_snapshot_id # We also changed execution plan schema in 0.7.11.post1 assert create_execution_plan_snapshot_id( ep_snapshot) != old_execution_plan_snapshot_id # This previously failed with a check error assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
def test_0_6_4(): instance = DagsterInstance.from_ref( InstanceRef.from_dir(file_relative_path(__file__, 'snapshot_0_6_4'))) runs = instance.all_runs() for run in runs: instance.all_logs(run.run_id)
def test_event_log_asset_key_migration(): src_dir = file_relative_path( __file__, "snapshot_0_9_22_lazy_asset_index_migration/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref( InstanceRef.from_dir( test_dir, overrides={ "event_log_storage": { "module": "dagster.core.storage.event_log.sqlite.consolidated_sqlite_event_log", "class": "ConsolidatedSqliteEventLogStorage", "config": { "base_dir": os.path.join(test_dir, "history") }, } }, )) # ensure everything is upgraded instance.upgrade() assert isinstance(instance._event_storage, SqlEventLogStorage) assert not instance._event_storage.has_secondary_index( SECONDARY_INDEX_ASSET_KEY) old_keys = instance.all_asset_keys() assert instance._event_storage.has_secondary_index( SECONDARY_INDEX_ASSET_KEY) new_keys = instance.all_asset_keys() assert set(old_keys) == set(new_keys)
def test_run_step_stats(): @pipeline def simple(): @solid def should_succeed(context): context.log.info('succeed') return 'yay' @solid(input_defs=[InputDefinition('_input', str)], output_defs=[OutputDefinition(str)]) def should_fail(context, _input): context.log.info('fail') raise Exception('booo') @solid def should_skip(context, _input): context.log.info('skip') return _input should_skip(should_fail(should_succeed())) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) step_stats = sorted(instance.get_run_step_stats(result.run_id), key=lambda x: x.end_time) assert len(step_stats) == 3 assert step_stats[0].step_key == 'should_succeed.compute' assert step_stats[0].status == StepEventStatus.SUCCESS assert step_stats[0].end_time > step_stats[0].start_time assert step_stats[1].step_key == 'should_fail.compute' assert step_stats[1].status == StepEventStatus.FAILURE assert step_stats[1].end_time > step_stats[0].start_time assert step_stats[2].step_key == 'should_skip.compute' assert step_stats[2].status == StepEventStatus.SKIPPED assert step_stats[2].end_time > step_stats[0].start_time
def test_connection_leak(hostname, conn_string): num_instances = 20 tempdir = tempfile.TemporaryDirectory() copies = [] for _ in range(num_instances): copies.append( DagsterInstance.from_ref( InstanceRef.from_dir(tempdir.name, overrides=yaml.safe_load( full_pg_config(hostname))))) with get_conn(conn_string).cursor() as curs: # count open connections curs.execute("SELECT count(*) from pg_stat_activity") res = curs.fetchall() # This includes a number of internal connections, so just ensure it did not scale # with number of instances assert res[0][0] < num_instances for copy in copies: copy.dispose() tempdir.cleanup()
def test_downgrade_and_upgrade(): test_dir = file_relative_path(__file__, 'snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite') with restore_directory(test_dir): # invariant check to make sure migration has not been run yet db_path = os.path.join(test_dir, 'history', 'runs.db') assert get_current_alembic_version(db_path) == '9fe9e746268c' assert 'snapshots' not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 # Make sure the schema is migrated instance.upgrade() assert get_current_alembic_version(db_path) == 'c63a27054f08' assert 'snapshots' in get_sqlite3_tables(db_path) assert {'id', 'snapshot_id', 'snapshot_body', 'snapshot_type'} == set( get_sqlite3_columns(db_path, 'snapshots') ) assert len(instance.get_runs()) == 1 instance._run_storage._alembic_downgrade(rev='9fe9e746268c') assert get_current_alembic_version(db_path) == '9fe9e746268c' assert 'snapshots' not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 instance.upgrade() assert get_current_alembic_version(db_path) == 'c63a27054f08' assert 'snapshots' in get_sqlite3_tables(db_path) assert {'id', 'snapshot_id', 'snapshot_body', 'snapshot_type'} == set( get_sqlite3_columns(db_path, 'snapshots') ) assert len(instance.get_runs()) == 1
def config_defaults(base_dir): defaults = InstanceRef.config_defaults(base_dir) defaults["run_coordinator"] = ConfigurableClassData( "dagster.core.run_coordinator.queued_run_coordinator", "QueuedRunCoordinator", yaml.dump({}), ) return defaults
def test_valid_managed_loggers_instance_yaml(): ref = InstanceRef.from_dir( base_dir=file_relative_path(__file__, "../../../docs_snippets/concepts/logging"), config_filename="python_logging_managed_loggers_config.yaml", ) instance = DagsterInstance.from_ref(ref) assert instance.managed_python_loggers == ["root"]
def test_valid_handler_instance_yaml(): ref = InstanceRef.from_dir( base_dir=file_relative_path(__file__, "../../../docs_snippets/concepts/logging"), config_filename="python_logging_handler_config.yaml", ) instance = DagsterInstance.from_ref(ref) assert len(instance.get_handlers()) == 2
def test_valid_log_level_instance_yaml(): ref = InstanceRef.from_dir( base_dir=file_relative_path(__file__, "../../../docs_snippets/concepts/logging"), config_filename="python_logging_python_log_level_config.yaml", ) instance = DagsterInstance.from_ref(ref) assert instance.python_log_level == "INFO"
def test_downgrade_and_upgrade(): test_dir = file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite") with restore_directory(test_dir): # invariant check to make sure migration has not been run yet db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 # Make sure the schema is migrated instance.upgrade() assert get_current_alembic_version(db_path) == "c63a27054f08" assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type" } == set(get_sqlite3_columns(db_path, "snapshots")) assert len(instance.get_runs()) == 1 instance._run_storage._alembic_downgrade(rev="9fe9e746268c") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 instance.upgrade() assert get_current_alembic_version(db_path) == "c63a27054f08" assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type" } == set(get_sqlite3_columns(db_path, "snapshots")) assert len(instance.get_runs()) == 1
def test_asset_lazy_migration(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized @op def materialize(): yield AssetMaterialization(AssetKey("a")) yield AssetMaterialization(AssetKey("b")) yield AssetMaterialization(AssetKey("c")) yield AssetMaterialization(AssetKey("d")) yield AssetMaterialization(AssetKey("e")) yield AssetMaterialization(AssetKey("f")) yield Output(None) @job def my_job(): materialize() with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage assert not storage.has_asset_key_index_cols() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # run the schema migration without reindexing the asset keys storage.upgrade() assert storage.has_asset_key_index_cols() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # fetch all asset keys instance.all_asset_keys() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # wipe a, b in order to populate wipe_timestamp storage.wipe_asset(AssetKey("a")) storage.wipe_asset(AssetKey("b")) # materialize all the assets to populate materialization_timestamp my_job.execute_in_process(instance=instance) # still should not be migrated (on write) assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # fetching partial results should not trigger migration instance.get_asset_keys(prefix=["b"]) instance.get_asset_keys(cursor=str(AssetKey("b"))) instance.get_latest_materialization_events( asset_keys=[AssetKey("b")]) assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # on read, we should see that all the data has already been migrated and we can now mark # the asset key index as migrated instance.all_asset_keys() assert storage.has_secondary_index(ASSET_KEY_INDEX_COLS)
def test_backcompat_asset_read(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized def _validate_instance_assets(instance): assert instance.all_asset_keys() == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), AssetKey("e"), AssetKey("f"), ] assert instance.get_asset_keys() == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), AssetKey("e"), AssetKey("f"), ] assert instance.get_asset_keys(prefix=["d"]) == [AssetKey("d")] assert instance.get_asset_keys(limit=3) == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), ] assert instance.get_asset_keys(cursor='["b"]', limit=3) == [ AssetKey("c"), AssetKey("d"), AssetKey("e"), ] @op def materialize(): yield AssetMaterialization(AssetKey("e")) yield AssetMaterialization(AssetKey("f")) yield Output(None) @job def my_job(): materialize() with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance) instance.upgrade() _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance) instance.reindex() _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance)
def test_get_materialization_count_by_partition(asset_aware_context): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") d = AssetKey("c") with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage materialization_count_by_key = storage.get_materialization_count_by_partition( [d]) assert materialization_count_by_key.get(d) == {} a = AssetKey("no_materializations_asset") b = AssetKey("no_partitions_asset") c = AssetKey("two_partitions_asset") @op def materialize(): yield AssetMaterialization(b) yield AssetMaterialization(c, partition="a") yield Output(None) @job def my_job(): materialize() @op def materialize_two(): yield AssetMaterialization(c, partition="a") yield AssetMaterialization(c, partition="b") yield Output(None) @job def job_two(): materialize_two() with asset_aware_context() as ctx: instance, event_log_storage = ctx my_job.execute_in_process(instance=instance) materialization_count_by_key = event_log_storage.get_materialization_count_by_partition( [a, b, c]) assert materialization_count_by_key.get(a) == {} assert materialization_count_by_key.get(b) == {} assert materialization_count_by_key.get(c)["a"] == 1 assert len(materialization_count_by_key.get(c)) == 1 job_two.execute_in_process(instance=instance) materialization_count_by_key = event_log_storage.get_materialization_count_by_partition( [a, b, c]) assert materialization_count_by_key.get(c)["a"] == 2 assert materialization_count_by_key.get(c)["b"] == 1
def test_0_8_0_scheduler_migration(): test_dir = file_relative_path(__file__, 'snapshot_0_8_0_scheduler_change') with restore_directory(test_dir): instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) with pytest.raises( DagsterInstanceMigrationRequired, match=_schedule_storage_migration_regex(current_revision='da7cd32b690d'), ): instance.all_stored_schedule_state() instance.upgrade() # upgrade just drops tables, and user upgrade flow is cli entry - so # emulate by new-ing up instance which will create new tables instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.all_stored_schedule_state()
def test_output_manager_with_failure(): _called_input_manager = False _called_solid = False @output_manager def should_fail(_, _obj): raise Failure( description="Foolure", metadata_entries=[ EventMetadataEntry.text(label="label", text="text", description="description") ], ) @input_manager def should_not_enter(_): _called_input_manager = True @solid(output_defs=[OutputDefinition(manager_key="should_fail")]) def emit_str(_): return "emit" @solid( input_defs=[ InputDefinition(name="_input_str", dagster_type=str, manager_key="should_not_enter") ] ) def should_not_call(_, _input_str): _called_solid = True @pipeline( mode_defs=[ ModeDefinition( resource_defs={"should_fail": should_fail, "should_not_enter": should_not_enter} ) ] ) def simple(): should_not_call(emit_str()) with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) assert not result.success failure_data = result.result_for_solid("emit_str").failure_data assert failure_data.error.cls_name == "Failure" assert failure_data.user_failure_data.description == "Foolure" assert failure_data.user_failure_data.metadata_entries[0].label == "label" assert failure_data.user_failure_data.metadata_entries[0].entry_data.text == "text" assert failure_data.user_failure_data.metadata_entries[0].description == "description" assert not _called_input_manager and not _called_solid
def test_0_10_6_add_bulk_actions_table(): src_dir = file_relative_path(__file__, "snapshot_0_10_6_add_bulk_actions_table/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "0da417ae1b81" assert "bulk_actions" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "bulk_actions" in get_sqlite3_tables(db_path)
def test_init_compute_log_with_bad_config_override(): with seven.TemporaryDirectory() as tmpdir_path: with pytest.raises(DagsterInvalidConfigError, match='Undefined field "garbage"'): DagsterInstance.from_ref( InstanceRef.from_dir( tmpdir_path, overrides={'compute_logs': { 'garbage': 'flargh' }}))