示例#1
0
def run_test_backfill(execution_args, expected_count=None, error_message=None):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher()
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        with mock.patch(
                'dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            if error_message:
                with pytest.raises(UsageError) as error_info:
                    execute_backfill_command(
                        backfill_execute_args(execution_args), no_print)
                assert error_info and error_message in error_info.value.message

            result = runner.invoke(pipeline_backfill_command,
                                   backfill_cli_runner_args(execution_args))
            if error_message:
                assert result.exit_code == 2
            else:
                assert result.exit_code == 0
                if expected_count:
                    assert len(run_launcher.queue()) == expected_count
示例#2
0
def test_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

    context = define_context_for_repository_yaml(
        path=script_relative_path('../repository.yaml'), instance=instance
    )

    result = execute_dagster_graphql(
        context=context,
        query=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        variables={
            'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'}
        },
    )

    assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess'
    assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED'

    run_id = result.data['launchPipelineExecution']['run']['runId']

    test_queue.run_one(instance)

    result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id})
    assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun'
    assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
示例#3
0
def test_get_schedule():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_test_context(instance)
        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        instance.reconcile_scheduler_state(
            repository=repository,
            python_path='/path/to/python',
            repository_path='/path/to/repository',
        )

        result = execute_dagster_graphql(
            context,
            GET_SCHEDULE,
            variables={'scheduleName': 'partition_based_multi_mode_decorator'},
        )

        assert result.data
        assert result.data['scheduleOrError']['__typename'] == 'RunningSchedule'
        assert result.data['scheduleOrError']['scheduleDefinition']['partitionSet']
示例#4
0
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
示例#5
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py
        # If you add a schedule there, be sure to update the number of schedules below
        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 18

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            if schedule['scheduleDefinition'][
                    'name'] == 'no_config_pipeline_hourly_schedule':
                assert schedule['status'] == 'RUNNING'

            if schedule['scheduleDefinition'][
                    'name'] == 'environment_dict_error_schedule':
                assert schedule['scheduleDefinition']['runConfigYaml'] is None
            elif schedule['scheduleDefinition'][
                    'name'] == 'invalid_config_schedule':
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'solids:\n  takes_an_enum:\n    config: invalid\n')
            else:
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'storage:\n  filesystem: {}\n')
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        scheduler=FilesystemTestScheduler(temp_dir),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
def define_scheduler_instance(tempdir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(tempdir),
        schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')),
        scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')),
    )
示例#8
0
def define_scheduler_instance():
    with seven.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
        )
 def _readonly_in_memory_instance():
     with seven.TemporaryDirectory() as temp_dir:
         yield DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=InMemoryEventLogStorage(),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=ExplodingRunLauncher(),
             schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
         )
示例#10
0
def get_instance():
    with tempfile.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=SqliteRunStorage.from_local(temp_dir),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
示例#11
0
def get_instance_with_launcher(temp_dir):
    test_queue = InMemoryRunLauncher()

    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
        run_launcher=test_queue,
    )
示例#12
0
 def _non_launchable_in_memory_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         yield DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=InMemoryEventLogStorage(),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=ExplodingRunLauncher(),
             run_coordinator=DefaultRunCoordinator(),
             schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
             scheduler=FilesystemTestScheduler(temp_dir),
         )
示例#13
0
def mocked_instance():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=InMemoryRunLauncher(),
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance
            yield instance
示例#14
0
def define_scheduler_instance(tempdir):
    with pytest.warns(UserWarning, match="`SystemCronScheduler` is deprecated"):
        return DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")),
            scheduler=SystemCronScheduler(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
示例#15
0
def test_in_memory_event_log_storage_store_events_and_wipe():
    storage = InMemoryEventLogStorage()
    assert len(storage.get_logs_for_run('foo')) == 0
    storage.store_event(
        DagsterEventRecord(
            None,
            'Message2',
            'debug',
            '',
            'foo',
            time.time(),
            dagster_event=DagsterEvent(
                DagsterEventType.ENGINE_EVENT.value,
                'nonce',
                event_specific_data=EngineEventData.in_process(999),
            ),
        ))
    assert len(storage.get_logs_for_run('foo')) == 1
    storage.wipe()
    assert len(storage.get_logs_for_run('foo')) == 0
示例#16
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.root import LocalArtifactStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(compute_logs_directory(tempdir)),
        )
示例#17
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.runs import InMemoryRunStorage

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        feature_set = _dagster_feature_set(tempdir)

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            root_storage_dir=tempdir,
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            feature_set=feature_set,
        )
示例#18
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        feature_set = _dagster_feature_set(tempdir)

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            root_storage_dir=tempdir,
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(_compute_logs_base_directory(tempdir)),
            feature_set=feature_set,
        )
示例#19
0
    def ephemeral(tempdir=None, preload=None):
        from dagster.core.launcher.sync_in_memory_run_launcher import SyncInMemoryRunLauncher
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.root import LocalArtifactStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.noop_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(preload=preload),
            event_storage=InMemoryEventLogStorage(preload=preload),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
示例#20
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.get_repository()
        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 11

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            assert (schedule['scheduleDefinition']['environmentConfigYaml'] ==
                    'storage:\n  filesystem: {}\n')
def test_basic_start_scheduled_execution_with_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(python_path=sys.executable,
                            repository_path=file_relative_path(
                                __file__, '../'))

        result = execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert (result.data['startScheduledExecution']['__typename'] ==
                'LaunchPipelineExecutionSuccess')

        assert uuid.UUID(
            result.data['startScheduledExecution']['run']['runId'])
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'no_config_pipeline')

        assert any(
            tag['key'] == 'dagster/schedule_name'
            and tag['value'] == 'no_config_pipeline_hourly_schedule'
            for tag in result.data['startScheduledExecution']['run']['tags'])
示例#22
0
def test_start_stop_schedule():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        start_result = execute_dagster_graphql(
            context,
            START_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert start_result.data['startSchedule']['schedule'][
            'status'] == 'RUNNING'

        # Stop schedule
        stop_result = execute_dagster_graphql(
            context,
            STOP_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert stop_result.data['stopRunningSchedule']['schedule'][
            'status'] == 'STOPPED'
示例#23
0
def run_launch(execution_args, expected_count=None):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher()
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            result = runner.invoke(pipeline_launch_command, execution_args)
            assert result.exit_code == 0, result.stdout
            if expected_count:
                assert len(run_launcher.queue()) == expected_count
示例#24
0
    def test_run_record_timestamps(self, storage):
        assert storage

        self._skip_in_memory(storage)

        @op
        def a():
            pass

        @job
        def my_job():
            a()

        with tempfile.TemporaryDirectory() as temp_dir:
            if storage._instance:  # pylint: disable=protected-access
                instance = storage._instance  # pylint: disable=protected-access
            else:
                instance = DagsterInstance(
                    instance_type=InstanceType.EPHEMERAL,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=storage,
                    event_storage=InMemoryEventLogStorage(),
                    compute_log_manager=NoOpComputeLogManager(),
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=SyncInMemoryRunLauncher(),
                )

            freeze_datetime = to_timezone(
                create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
            )

            with pendulum.test(freeze_datetime):
                result = my_job.execute_in_process(instance=instance)
                records = instance.get_run_records(
                    filters=PipelineRunsFilter(run_ids=[result.run_id])
                )
                assert len(records) == 1
                record = records[0]
                assert record.start_time == freeze_datetime.timestamp()
                assert record.end_time == freeze_datetime.timestamp()
示例#25
0
 def event_log_storage(self):  # pylint: disable=arguments-differ
     return InMemoryEventLogStorage()
示例#26
0
 def event_log_storage(self):  # pylint: disable=arguments-differ
     storage = InMemoryEventLogStorage()
     try:
         yield storage
     finally:
         storage.dispose()
示例#27
0
def create_in_memory_event_log_storage():
    yield InMemoryEventLogStorage()
示例#28
0
def test_event_log_storage_watch(event_storage_factory_cm_fn):
    def evt(name):
        return DagsterEventRecord(
            None,
            name,
            'debug',
            '',
            'foo',
            time.time(),
            dagster_event=DagsterEvent(
                DagsterEventType.ENGINE_EVENT.value,
                'nonce',
                event_specific_data=EngineEventData.in_process(999),
            ),
        )

    with event_storage_factory_cm_fn() as storage:
        watched = []
        watcher = lambda x: watched.append(x)  # pylint: disable=unnecessary-lambda

        storage = InMemoryEventLogStorage()
        assert len(storage.get_logs_for_run('foo')) == 0

        storage.store_event(evt('Message1'))
        assert len(storage.get_logs_for_run('foo')) == 1
        assert len(watched) == 0

        storage.watch('foo', None, watcher)
        storage.store_event(evt('Message2'))
        assert len(storage.get_logs_for_run('foo')) == 2
        assert len(watched) == 1

        storage.end_watch('foo', lambda event: None)
        storage.store_event(evt('Message3'))
        assert len(storage.get_logs_for_run('foo')) == 3
        assert len(watched) == 2

        storage.end_watch('bar', lambda event: None)
        storage.store_event(evt('Message4'))
        assert len(storage.get_logs_for_run('foo')) == 4
        assert len(watched) == 3

        time.sleep(
            0.5
        )  # this value scientifically selected from a range of attractive values
        storage.end_watch('foo', watcher)
        time.sleep(0.5)
        storage.store_event(evt('Message5'))
        assert len(storage.get_logs_for_run('foo')) == 5
        assert len(watched) == 3

        storage.delete_events('foo')
        assert len(storage.get_logs_for_run('foo')) == 0
        assert len(watched) == 3
示例#29
0
def create_in_memory_event_log_instance():
    with seven.TemporaryDirectory() as temp_dir:
        asset_storage = InMemoryEventLogStorage()
        instance = get_instance(temp_dir, asset_storage)
        yield [instance, asset_storage]
示例#30
0
def test_in_memory_event_log_storage_run_not_found():
    storage = InMemoryEventLogStorage()
    assert storage.get_logs_for_run('bar') == []