super(DagsterAutoRestartTrick, self).__init__(*args, **kwargs) self.restarting = False def on_any_event(self, event): self.restarting = True super(DagsterAutoRestartTrick, self).on_any_event(event) self.restarting = False def handle_sigterm(_signum, _frame): raise KeyboardInterrupt() # Note: This must be declared outside of `main` or it is cleaned up by # some weakref magic when the watchmedo restarts Dagit. host_tempdir = seven.TemporaryDirectory() watch_tempdir = seven.TemporaryDirectory() def main(): # Build the dagit-cli command watch_for_reload = True fallback_set = False command = ['dagit-cli'] for arg in sys.argv[1:]: if arg == '--help': watch_for_reload = False command.append(arg) elif arg == '--version': watch_for_reload = False command.append(arg)
def temp_storage(): if DagsterInstance._PROCESS_TEMPDIR is None: DagsterInstance._PROCESS_TEMPDIR = seven.TemporaryDirectory() return DagsterInstance._PROCESS_TEMPDIR.name
def test_pipeline_reexecution_successful_launch(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) run_id = make_new_run_id() result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': { 'name': 'no_config_pipeline' }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert result.data['launchPipelineExecution'][ '__typename'] == 'LaunchPipelineExecutionSuccess' assert result.data['launchPipelineExecution']['run'][ 'status'] == 'NOT_STARTED' test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS' # reexecution new_run_id = make_new_run_id() result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ 'executionParams': { 'selector': { 'name': 'no_config_pipeline' }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'executionMetadata': { 'runId': new_run_id, 'rootRunId': run_id, 'parentRunId': run_id, }, 'mode': 'default', } }, ) assert (result.data['launchPipelineReexecution']['__typename'] == 'LaunchPipelineReexecutionSuccess') test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': new_run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def create_sqlite_run_event_logstorage(): with seven.TemporaryDirectory() as tmpdir_path: yield SqliteEventLogStorage(tmpdir_path)
def create_in_memory_event_log_instance(): with seven.TemporaryDirectory() as temp_dir: asset_storage = InMemoryEventLogStorage() instance = get_instance(temp_dir, asset_storage) yield [instance, asset_storage]
def test_run_groups_over_time(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir=tempdir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id context_at_time_1 = define_out_of_process_context( __file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t1_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test foo_run_id assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } context_at_time_2 = define_out_of_process_context( __file__, "get_repo_at_time_2", instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t2_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"] # names same assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] == t2_runs[evolve_a_run_id]["pipeline"]["name"]) # snapshots differ assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] != t2_runs[evolve_a_run_id]["pipelineSnapshotId"]) # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }
def test_priority_pipeline(): with seven.TemporaryDirectory() as tempdir: result = execute_pipeline_on_celery(tempdir, 'priority_pipeline') assert result.success
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt" ), ], env=env, ) run_id = "d5f89349-7477-4fab-913e-0925cef0a959" with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision=None) ): execute_pipeline(noop_pipeline, instance=instance) # ensure migration is run instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert runs[0].run_id == run_id run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def graphql_context(): with seven.TemporaryDirectory() as temp_dir: yield define_test_context(DagsterInstance.local_temp(temp_dir))
def _post_artifacts(self, log, step_run_ref, run_id, step_key): ''' Synchronize the step run ref and pyspark code to an S3 staging bucket for use on EMR. For the zip file, consider the following toy example: # Folder: my_pyspark_project/ # a.py def foo(): print(1) # b.py def bar(): print(2) # main.py from a import foo from b import bar foo() bar() This will zip up `my_pyspark_project/` as `my_pyspark_project.zip`. Then, when running `spark-submit --py-files my_pyspark_project.zip emr_step_main.py` on EMR this will print 1, 2. ''' with seven.TemporaryDirectory() as temp_dir: s3 = boto3.client('s3', region_name=self.region_name) # Upload step run ref def _upload_file_to_s3(local_path, s3_filename): key = self._artifact_s3_key(run_id, step_key, s3_filename) s3_uri = self._artifact_s3_uri(run_id, step_key, s3_filename) log.debug('Uploading file {local_path} to {s3_uri}'.format( local_path=local_path, s3_uri=s3_uri)) s3.upload_file(Filename=local_path, Bucket=self.staging_bucket, Key=key) # Upload main file. # The remote Dagster installation should also have the file, but locating it there # could be a pain. main_local_path = self._main_file_local_path() _upload_file_to_s3(main_local_path, self._main_file_name()) if self.deploy_local_pipeline_package: # Zip and upload package containing pipeline zip_local_path = os.path.join(temp_dir, CODE_ZIP_NAME) build_pyspark_zip(zip_local_path, self.local_pipeline_package_path) _upload_file_to_s3(zip_local_path, CODE_ZIP_NAME) # Create step run ref pickle file step_run_ref_local_path = os.path.join( temp_dir, PICKLED_STEP_RUN_REF_FILE_NAME) with open(step_run_ref_local_path, 'wb') as step_pickle_file: pickle.dump(step_run_ref, step_pickle_file) _upload_file_to_s3(step_run_ref_local_path, PICKLED_STEP_RUN_REF_FILE_NAME)
def dagster_cli_runner(): with seven.TemporaryDirectory() as dagster_home_temp: with instance_for_test_tempdir(dagster_home_temp): yield CliRunner(env={"DAGSTER_HOME": dagster_home_temp})
def sqlite_instance_with_manager_disabled(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance.local_temp( tempdir=temp_dir, overrides={'dagit': {'execution_manager': {'disabled': True}}} )
def dagster_cli_runner(): with seven.TemporaryDirectory() as dagster_home_temp: yield CliRunner(env={'DAGSTER_HOME': dagster_home_temp})
def test_execute_display_command(): with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) run_config = { "solids": { "create_string_1": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "create_string_2": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_1": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_2": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_two_inputs": { "config": { "input_str": "apple", "base_dir": temp_dir } }, }, "intermediate_storage": { "filesystem": { "config": { "base_dir": temp_dir } } }, } # write run config to temp file # file is temp because intermediate storage directory is temporary with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f: f.write(yaml.dump(run_config)) kwargs = { "config": (os.path.join(temp_dir, "pipeline_config.yaml"), ), "pipeline": "basic_pipeline", "python_file": file_relative_path( __file__, "../../core_tests/execution_tests/memoized_dev_loop_pipeline.py" ), "tags": '{"dagster/is_memoized_run": "true"}', } with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output # execute the pipeline once so that addresses have been populated. result = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output
def default_instance(overrides=None): with seven.TemporaryDirectory() as temp_dir: with default_instance_tempdir(temp_dir, overrides) as instance: yield instance
def test_init_compute_log_with_bad_config(): with seven.TemporaryDirectory() as tmpdir_path: with open(os.path.join(tmpdir_path, 'dagster.yaml'), 'w') as fd: yaml.dump({'compute_logs': {'garbage': 'flargh'}}, fd, default_flow_style=False) with pytest.raises(DagsterInvalidConfigError, match='Undefined field "garbage"'): DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
def schedule_tempdir(): with seven.TemporaryDirectory() as tempdir: yield tempdir
def test_init_compute_log_with_bad_config_override(): with seven.TemporaryDirectory() as tmpdir_path: with pytest.raises(DagsterInvalidConfigError, match='Undefined field "garbage"'): DagsterInstance.from_ref( InstanceRef.from_dir(tmpdir_path, overrides={'compute_logs': {'garbage': 'flargh'}}) )
def create_consolidated_sqlite_run_event_log_storage(): with seven.TemporaryDirectory() as tmpdir_path: yield ConsolidatedSqliteEventLogStorage(tmpdir_path)
def tempdir_wrapper(tempdir=None): if tempdir: yield tempdir else: with seven.TemporaryDirectory() as t: yield t
def test_compute_log_manager(s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() # Uses mock S3 s3 = boto3.client("s3") s3.create_bucket(Bucket=s3_bucket) with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=s3_bucket, prefix="my_prefix", local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_launcher=CliApiRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = s3.get_object( Bucket=s3_bucket, Key="{prefix}/storage/{run_id}/compute_logs/easy.compute.err". format(prefix="my_prefix", run_id=result.run_id), ) stderr_s3 = six.ensure_str(s3_object["Body"].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager( mock_create_blob_client, mock_generate_blob_sas, storage_account, container, credential ): mock_generate_blob_sas.return_value = "fake-url" fake_client = FakeBlobServiceClient(storage_account) mock_create_blob_client.return_value = fake_client @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = AzureBlobComputeLogManager( storage_account=storage_account, container=container, prefix="my_prefix", local_dir=temp_dir, secret_key=credential, ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check ADLS2 directly adls2_object = fake_client.get_blob_client( container=container, blob="{prefix}/storage/{run_id}/compute_logs/easy.compute.err".format( prefix="my_prefix", run_id=result.run_id ), ) adls2_stderr = six.ensure_str(adls2_object.download_blob().readall()) for expected in EXPECTED_LOGS: assert expected in adls2_stderr # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_input_manager_with_retries(): _called = False _count = {"total": 0} @input_manager def should_succeed(_, _resource_config): if _count["total"] < 2: _count["total"] += 1 raise RetryRequested(max_retries=3) return "foo" @input_manager def should_retry(_, _resource_config): raise RetryRequested(max_retries=3) @input_manager def should_not_execute(_, _resource_config): _called = True @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "should_succeed": should_succeed, "should_not_execute": should_not_execute, "should_retry": should_retry, }) ]) def simple(): @solid def source_solid(_): return "foo" @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_succeed") ]) def take_input_1(_, solid_input): return solid_input @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_retry") ]) def take_input_2(_, solid_input): return solid_input @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_not_execute") ]) def take_input_3(_, solid_input): return solid_input take_input_3(take_input_2(take_input_1(source_solid()))) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) step_stats = instance.get_run_step_stats(result.run_id) assert len(step_stats) == 3 step_stats_1 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_1"]) assert len(step_stats_1) == 1 step_stat_1 = step_stats_1[0] assert step_stat_1.status.value == "SUCCESS" assert step_stat_1.attempts == 3 step_stats_2 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_2"]) assert len(step_stats_2) == 1 step_stat_2 = step_stats_2[0] assert step_stat_2.status.value == "FAILURE" assert step_stat_2.attempts == 4 step_stats_3 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_3"]) assert len(step_stats_3) == 0 assert _called == False
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string): engine = create_engine(conn_string) engine.execute('drop schema public cascade;') engine.execute('create schema public;') env = os.environ.copy() env['PGPASSWORD'] = '******' subprocess.check_call( [ 'psql', '-h', hostname, '-p', '5432', '-U', 'test', '-f', file_relative_path( __file__, 'snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt' ), ], env=env, ) run_id = 'd5f89349-7477-4fab-913e-0925cef0a959' with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, 'dagster.yaml'), 'r') as template_fd: with open(os.path.join(tempdir, 'dagster.yaml'), 'w') as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises(DagsterInstanceMigrationRequired, match=_migration_regex(current_revision=None)): execute_pipeline(noop_pipeline, instance=instance) # ensure migration is run instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert runs[0].run_id == run_id run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def create_consolidated_sqlite_event_log_instance(): with seven.TemporaryDirectory() as temp_dir: asset_storage = ConsolidatedSqliteEventLogStorage(temp_dir) instance = get_instance(temp_dir, asset_storage) yield [instance, asset_storage]
def test_0_7_6_postgres_pre_event_log_migration(hostname, conn_string): engine = create_engine(conn_string) engine.execute('drop schema public cascade;') engine.execute('create schema public;') env = os.environ.copy() env['PGPASSWORD'] = '******' subprocess.check_call( [ 'psql', '-h', hostname, '-p', '5432', '-U', 'test', '-f', file_relative_path( __file__, 'snapshot_0_7_6_pre_event_log_migration/postgres/pg_dump.txt'), ], env=env, ) run_id = 'ca7f1e33-526d-4f75-9bc5-3e98da41ab97' with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, 'dagster.yaml'), 'r') as template_fd: with open(os.path.join(tempdir, 'dagster.yaml'), 'w') as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Runs will appear in DB, but event logs need migration runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) # Make sure the schema is migrated instance.upgrade() assert isinstance(instance._event_storage, SqlEventLogStorage) events_by_id = instance._event_storage.get_logs_for_run_by_log_id( run_id) assert len(events_by_id) == 40 step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) == 0 # run the event_log data migration migrate_event_log_data(instance=instance) step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) > 0
def test_dev_loop_changing_versions(): with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) run_config = { "solids": { "create_string_1": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "create_string_2": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_1": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_2": { "config": { "input_str": "apple", "base_dir": temp_dir } }, "take_string_two_inputs": { "config": { "input_str": "apple", "base_dir": temp_dir } }, }, "intermediate_storage": { "filesystem": { "config": { "base_dir": temp_dir } } }, } result = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") run_config["solids"]["take_string_1"]["config"]["input_str"] = "banana" assert set( get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")) == set([ "take_string_1.compute", "take_string_two_inputs.compute" ]) result2 = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result2.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") run_config["solids"]["take_string_two_inputs"]["config"][ "input_str"] = "banana" assert get_step_keys_to_execute( instance, basic_pipeline, run_config, "only_mode") == ["take_string_two_inputs.compute"] result3 = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result3.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
def create_sqlite_run_storage(): with seven.TemporaryDirectory() as tempdir: yield SqliteRunStorage.from_local(tempdir)
def test_output_manager_with_failure(): _called_input_manager = False _called_solid = False @output_manager def should_fail(_, _resource_config, _obj): raise Failure( description="Foolure", metadata_entries=[ EventMetadataEntry.text(label="label", text="text", description="description") ], ) @input_manager def should_not_enter(_): _called_input_manager = True @solid(output_defs=[OutputDefinition(manager_key="should_fail")]) def emit_str(_): return "emit" @solid(input_defs=[ InputDefinition(name="_input_str", dagster_type=str, manager_key="should_not_enter") ]) def should_not_call(_, _input_str): _called_solid = True @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "should_fail": should_fail, "should_not_enter": should_not_enter }) ]) def simple(): should_not_call(emit_str()) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) assert not result.success failure_data = result.result_for_solid("emit_str").failure_data assert failure_data.error.cls_name == "Failure" assert failure_data.user_failure_data.description == "Foolure" assert failure_data.user_failure_data.metadata_entries[ 0].label == "label" assert failure_data.user_failure_data.metadata_entries[ 0].entry_data.text == "text" assert failure_data.user_failure_data.metadata_entries[ 0].description == "description" assert not _called_input_manager and not _called_solid
def test_runs_over_time(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id context_at_time_1 = define_context_for_file(__file__, "get_repo_at_time_1", instance) result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY) assert result.data t1_runs = { run["runId"]: run for run in result.data["pipelineRunsOrError"]["results"] } assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } context_at_time_2 = define_context_for_file(__file__, "get_repo_at_time_2", instance) result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY) assert result.data t2_runs = { run["runId"]: run for run in result.data["pipelineRunsOrError"]["results"] } assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }