def run_test_backfill(execution_args, expected_count=None, error_message=None, use_run_launcher=True): runner = CliRunner() run_launcher = InMemoryRunLauncher() if use_run_launcher else None with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=run_launcher, ) with mock.patch( 'dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance if error_message: with pytest.raises(UsageError) as error_info: execute_backfill_command( backfill_execute_args(execution_args), no_print) assert error_info and error_message in error_info.value.message result = runner.invoke(pipeline_backfill_command, backfill_cli_runner_args(execution_args)) if error_message: assert result.exit_code == 2 else: assert result.exit_code == 0 if expected_count: assert len(run_launcher.queue()) == expected_count
def test_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml( path=script_relative_path('../repository.yaml'), instance=instance ) result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'} }, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess' assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository.name, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py # If you add a schedule there, be sure to update the number of schedules below assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 18 for schedule in scheduler_result.data['scheduler']['runningSchedules']: if schedule['scheduleDefinition'][ 'name'] == 'no_config_pipeline_hourly_schedule': assert schedule['status'] == 'RUNNING' if schedule['scheduleDefinition'][ 'name'] == 'environment_dict_error_schedule': assert schedule['scheduleDefinition']['runConfigYaml'] is None elif schedule['scheduleDefinition'][ 'name'] == 'invalid_config_schedule': assert (schedule['scheduleDefinition']['runConfigYaml'] == 'solids:\n takes_an_enum:\n config: invalid\n') else: assert (schedule['scheduleDefinition']['runConfigYaml'] == 'storage:\n filesystem: {}\n')
def define_scheduler_instance(tempdir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(tempdir), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')), scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')), )
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), scheduler=FilesystemTestScheduler(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def define_scheduler_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def get_instance_with_launcher(temp_dir): test_queue = InMemoryRunLauncher() return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, )
def mocked_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=InMemoryRunLauncher(), ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance yield instance
def dagster_instance(run_launcher, network_postgres): tempdir = DagsterInstance.temp_storage() postgres_url = 'postgresql://*****:*****@localhost:{network_postgres}/test'.format( network_postgres=network_postgres) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager( compute_logs_directory(tempdir)), run_launcher=run_launcher, ) return instance
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(compute_logs_directory(tempdir)), )
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() feature_set = _dagster_feature_set(tempdir) return DagsterInstance( InstanceType.EPHEMERAL, root_storage_dir=tempdir, run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(_compute_logs_base_directory(tempdir)), feature_set=feature_set, )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.get_repository() scheduler_handle = context.scheduler_handle scheduler_handle.up( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 11 for schedule in scheduler_result.data['scheduler']['runningSchedules']: assert (schedule['scheduleDefinition']['environmentConfigYaml'] == 'storage:\n filesystem: {}\n')
def test_start_stop_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule start_result = execute_dagster_graphql( context, START_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert start_result.data['startSchedule']['schedule'][ 'status'] == 'RUNNING' # Stop schedule stop_result = execute_dagster_graphql( context, STOP_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert stop_result.data['stopRunningSchedule']['schedule'][ 'status'] == 'STOPPED'
def test_basic_start_scheduled_execution_with_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) scheduler_handle = context.scheduler_handle scheduler_handle.up(python_path=sys.executable, repository_path=file_relative_path( __file__, '../')) result = execute_dagster_graphql( context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert not result.errors assert result.data # just test existence assert (result.data['startScheduledExecution']['__typename'] == 'LaunchPipelineExecutionSuccess') assert uuid.UUID( result.data['startScheduledExecution']['run']['runId']) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'no_config_pipeline') assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'no_config_pipeline_hourly_schedule' for tag in result.data['startScheduledExecution']['run']['tags'])
def run_launch(execution_args, expected_count=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=run_launcher, ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance result = runner.invoke(pipeline_launch_command, execution_args) assert result.exit_code == 0 if expected_count: assert len(run_launcher.queue()) == expected_count
def dagster_instance(helm_namespace, run_launcher): @contextmanager def local_port_forward_postgres(): print('Port-forwarding postgres') postgres_pod_name = (check_output([ 'kubectl', 'get', 'pods', '--namespace', helm_namespace, '-l', 'app=postgresql,release=dagster', '-o', 'jsonpath="{.items[0].metadata.name}"', ]).decode('utf-8').strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=helm_namespace) try: p = subprocess.Popen([ 'kubectl', 'port-forward', '--namespace', helm_namespace, postgres_pod_name, '{forward_port}:5432'.format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( 'Timed out while waiting for postgres port forwarding') print( 'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be' ' available...' % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database='test', user='******', password='******', host='localhost', port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except pass time.sleep(1) yield forward_port finally: print('Terminating port-forwarding') p.terminate() tempdir = DagsterInstance.temp_storage() with local_port_forward_postgres() as local_forward_port: postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format( local_forward_port=local_forward_port) print('Local Postgres forwarding URL: ', postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager( compute_logs_directory(tempdir)), run_launcher=run_launcher, ) yield instance