def dagster_instance_with_k8s_scheduler( helm_namespace_for_k8s_run_launcher, run_launcher, k8s_scheduler, schedule_tempdir ): with local_port_forward_postgres( namespace=helm_namespace_for_k8s_run_launcher ) as local_forward_port: postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format( local_forward_port=local_forward_port ) print("Local Postgres forwarding URL: ", postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(schedule_tempdir), run_storage=SqliteRunStorage.from_local(os.path.join(schedule_tempdir, "runs")), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=run_launcher, schedule_storage=SqliteScheduleStorage.from_local( os.path.join(schedule_tempdir, "schedules") ), scheduler=k8s_scheduler, ) yield instance
def test_fs_stores(): @pipeline def simple(): @solid def easy(context): context.log.info('easy') return 'easy' easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, ) result = execute_pipeline(simple, instance=instance) assert run_store.has_run(result.run_id) assert run_store.get_run_by_id(result.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_store.get_logs_for_run(result.run_id) if event.is_dagster_event ] stats = event_store.get_stats_for_run(result.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def test_postgres_instance(multi_postgres): run_storage_conn_string, event_log_storage_conn_string = multi_postgres run_storage = PostgresRunStorage.create_clean_storage( run_storage_conn_string) event_storage = PostgresEventLogStorage.create_clean_storage( event_log_storage_conn_string) with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_storage, event_storage=event_storage, compute_log_manager=LocalComputeLogManager(temp_dir), ) result = execute_pipeline(simple, instance=instance) assert run_storage.has_run(result.run_id) assert run_storage.get_run_by_id( result.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_storage.get_logs_for_run(result.run_id) if event.is_dagster_event ] stats = event_storage.get_stats_for_run(result.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def run_test_backfill(execution_args, expected_count=None, error_message=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) with mock.patch( 'dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance if error_message: with pytest.raises(UsageError) as error_info: execute_backfill_command( backfill_execute_args(execution_args), no_print) assert error_info and error_message in error_info.value.message result = runner.invoke(pipeline_backfill_command, backfill_cli_runner_args(execution_args)) if error_message: assert result.exit_code == 2 else: assert result.exit_code == 0 if expected_count: assert len(run_launcher.queue()) == expected_count
def test_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml( path=script_relative_path('../repository.yaml'), instance=instance ) result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'} }, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess' assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def test_get_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_test_context(instance) # Initialize scheduler repository = context.legacy_get_repository_definition() instance.reconcile_scheduler_state( repository=repository, python_path='/path/to/python', repository_path='/path/to/repository', ) result = execute_dagster_graphql( context, GET_SCHEDULE, variables={'scheduleName': 'partition_based_multi_mode_decorator'}, ) assert result.data assert result.data['scheduleOrError']['__typename'] == 'RunningSchedule' assert result.data['scheduleOrError']['scheduleDefinition']['partitionSet']
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository.name, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py # If you add a schedule there, be sure to update the number of schedules below assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 18 for schedule in scheduler_result.data['scheduler']['runningSchedules']: if schedule['scheduleDefinition'][ 'name'] == 'no_config_pipeline_hourly_schedule': assert schedule['status'] == 'RUNNING' if schedule['scheduleDefinition'][ 'name'] == 'environment_dict_error_schedule': assert schedule['scheduleDefinition']['runConfigYaml'] is None elif schedule['scheduleDefinition'][ 'name'] == 'invalid_config_schedule': assert (schedule['scheduleDefinition']['runConfigYaml'] == 'solids:\n takes_an_enum:\n config: invalid\n') else: assert (schedule['scheduleDefinition']['runConfigYaml'] == 'storage:\n filesystem: {}\n')
def test_execute_display_command(): with tempfile.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) run_config = { "solids": { "create_string_1_asset": {"config": {"input_str": "apple"}}, "take_string_1_asset": {"config": {"input_str": "apple"}}, }, "resources": {"object_manager": {"config": {"base_dir": temp_dir}}}, } # write run config to temp file # file is temp because intermediate storage directory is temporary with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f: f.write(yaml.dump(run_config)) kwargs = { "config": (os.path.join(temp_dir, "pipeline_config.yaml"),), "pipeline": "asset_pipeline", "python_file": file_relative_path( __file__, "../../core_tests/execution_tests/memoized_dev_loop_pipeline.py" ), "tags": '{"dagster/is_memoized_run": "true"}', } with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output # execute the pipeline once so that addresses have been populated. result = execute_pipeline( asset_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), scheduler=FilesystemTestScheduler(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def get_instance(temp_dir, event_log_storage): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=event_log_storage, compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def define_scheduler_instance(tempdir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(tempdir), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')), scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')), )
def get_instance(): with tempfile.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=SqliteRunStorage.from_local(temp_dir), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def define_scheduler_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def _readonly_in_memory_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=ExplodingRunLauncher(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), )
def _sqlite_asset_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=ConsolidatedSqliteEventLogStorage(temp_dir), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) yield instance
def get_instance_with_launcher(temp_dir): test_queue = InMemoryRunLauncher() return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, )
def _sqlite_asset_instance(): with tempfile.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=ConsolidatedSqliteEventLogStorage(temp_dir), compute_log_manager=LocalComputeLogManager(temp_dir), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), scheduler=FilesystemTestScheduler(temp_dir), ) yield instance
def define_scheduler_instance(tempdir): with pytest.warns(UserWarning, match="`SystemCronScheduler` is deprecated"): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")), scheduler=SystemCronScheduler(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def dagster_instance(helm_postgres_url, run_launcher): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url), event_storage=PostgresEventLogStorage(helm_postgres_url), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=run_launcher, ) as instance: yield instance
def _non_launchable_in_memory_instance(): with tempfile.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=ExplodingRunLauncher(), run_coordinator=DefaultRunCoordinator(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), )
def mocked_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=InMemoryRunLauncher(), ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance yield instance
def get_ephemeral_instance(temp_dir): run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_launcher=DefaultRunLauncher(), run_coordinator=DefaultRunCoordinator(), ) return instance
def _postgres_instance(): with seven.TemporaryDirectory() as temp_dir: with graphql_postgres_instance() as pg_conn_string: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=TestPostgresInstance.clean_run_storage(pg_conn_string), event_storage=TestPostgresInstance.clean_event_log_storage(pg_conn_string), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=SyncInMemoryRunLauncher(), schedule_storage=TestPostgresInstance.clean_schedule_storage( pg_conn_string ), )
def broken_compute_log_manager_instance(fail_on_setup=False, fail_on_teardown=False): with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): yield DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=SqliteRunStorage.from_local(temp_dir), event_storage=SqliteEventLogStorage(temp_dir), compute_log_manager=BrokenComputeLogManager( fail_on_setup=fail_on_setup, fail_on_teardown=fail_on_teardown ), run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), )
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(compute_logs_directory(tempdir)), )
def dagster_instance(run_launcher, network_postgres): tempdir = DagsterInstance.temp_storage() postgres_url = 'postgresql://*****:*****@localhost:{network_postgres}/test'.format( network_postgres=network_postgres) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager( compute_logs_directory(tempdir)), run_launcher=run_launcher, ) return instance
def ephemeral(tempdir=None, preload=None): from dagster.core.launcher.sync_in_memory_run_launcher import SyncInMemoryRunLauncher from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.noop_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(preload=preload), event_storage=InMemoryEventLogStorage(preload=preload), compute_log_manager=NoOpComputeLogManager(), run_launcher=SyncInMemoryRunLauncher(), )
def dagster_instance(helm_postgres_url): # pylint: disable=redefined-outer-name with tempfile.TemporaryDirectory() as tempdir: with environ({"DAGSTER_HOME": tempdir}): with DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url), event_storage=PostgresEventLogStorage(helm_postgres_url), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=ExplodingRunLauncher(), # use graphql to launch any runs ref=InstanceRef.from_dir(tempdir), ) as instance: yield instance check_export_runs(instance)
def _postgres_instance_with_grpc_api_hijack(): with seven.TemporaryDirectory() as temp_dir: with graphql_postgres_instance() as pg_conn_string: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=TestPostgresInstance.clean_run_storage(pg_conn_string), event_storage=TestPostgresInstance.clean_event_log_storage(pg_conn_string), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=EphemeralGrpcRunLauncher(), schedule_storage=TestPostgresInstance.clean_schedule_storage( pg_conn_string ), ) try: yield instance finally: instance.run_launcher.join()