Пример #1
0
def execute_step_out_of_process(step_context, step):
    if step_context.run_config.loggers:
        step_context.log.debug(
            'Loggers cannot be injected via RunConfig using the multiprocess executor. Define '
            'loggers on the mode instead. Ignoring loggers: [{logger_names}]'.
            format(logger_names=', '.join([
                '\'{name}\''.format(name=logger.name)
                for logger in step_context.run_config.loggers
            ])))

    run_config = RunConfig(
        run_id=step_context.run_config.run_id,
        tags=step_context.run_config.tags,
        loggers=None,
        event_callback=None,
        reexecution_config=None,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )

    command = InProcessExecutorChildProcessCommand(
        step_context.environment_dict, run_config,
        step_context.executor_config, step.key)

    for step_event in execute_child_process_command(command):
        if step_context.run_config.event_callback and isinstance(
                step_event, DagsterEvent):
            log_step_event(step_context, step_event)
        yield step_event
Пример #2
0
def composite_descent(pipeline_def, solids_config, run_config=None):
    '''
    This function is responsible for constructing the dictionary
    of SolidConfig (indexed by handle) that will be passed into the
    EnvironmentConfig. Critically this is the codepath that manages config mapping,
    where the runtime calls into user-defined config mapping functions to
    produce config for child solids of composites.

    pipeline_def (PipelineDefintiion): PipelineDefinition
    solids_config (dict): Configuration for the solids in the pipeline. The "solids" entry
    of the environment_dict. Assumed to have already been validated.
    '''
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
    check.dict_param(solids_config, 'solids_config')

    run_config = (RunConfig() if run_config is None else check.inst_param(
        run_config, 'run_config', IRunConfig))

    return {
        handle.to_string(): solid_config
        for handle, solid_config in _composite_descent(
            parent_stack=DescentStack(pipeline_def, None),
            solids_config_dict=solids_config,
        )
    }
Пример #3
0
def test_papertrail_logger():
    with mock.patch('logging.handlers.SysLogHandler.emit') as emit:

        execute_pipeline(
            hello_pipeline,
            {
                'loggers': {
                    'console': {
                        'config': {
                            'log_level': 'INFO'
                        }
                    },
                    'papertrail': {
                        'config': {
                            'log_level': 'INFO',
                            'name': 'hello_pipeline',
                            'papertrail_address': '127.0.0.1',
                            'papertrail_port': 12345,
                        }
                    },
                }
            },
            run_config=RunConfig(run_id='123'),
        )

    log_record = emit.call_args_list[0][0][0]

    assert isinstance(log_record, logging.LogRecord)
    assert log_record.name == 'hello_pipeline'
    assert log_record.levelname == 'INFO'

    assert (log_record.msg == '''system - 123 - Hello, world!
               solid = "hello_logs"
    solid_definition = "hello_logs"
            step_key = "hello_logs.compute"''')
Пример #4
0
def test_single_step_resource_event_logs():
    # Test to attribute logs for single-step plans which are often the representation of
    # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten
    # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239
    USER_SOLID_MESSAGE = 'I AM A SOLID'
    USER_RESOURCE_MESSAGE = 'I AM A RESOURCE'
    events = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        events.append(record)

    @solid(required_resource_keys={'a'})
    def resource_solid(context):
        context.log.info(USER_SOLID_MESSAGE)

    @resource
    def resource_a(context):
        context.log.info(USER_RESOURCE_MESSAGE)
        return 'A'

    pipeline = PipelineDefinition(
        name='resource_logging_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={'a': resource_a},
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )

    result = execute_pipeline(
        pipeline,
        environment_dict={
            'loggers': {
                'callback': {}
            },
        },
        instance=DagsterInstance.local_temp(),
        run_config=RunConfig(step_keys_to_execute=['resource_solid.compute']),
    )
    assert result.success
    log_messages = [
        event for event in events if isinstance(event, LogMessageRecord)
    ]
    assert len(log_messages) == 2

    resource_log_message = next(
        iter([
            message for message in log_messages
            if message.user_message == USER_RESOURCE_MESSAGE
        ]))
    assert resource_log_message.step_key == 'resource_solid.compute'
Пример #5
0
def test_execute_on_dask():
    '''This test is flaky on py27, I believe because of
    https://github.com/dask/distributed/issues/2446. For now, we just retry a couple times...
    '''
    result = execute_on_dask(
        ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline),
        env_config={'storage': {
            'filesystem': {}
        }},
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM),
        dask_config=DaskConfig(timeout=30),
    )
    assert result.result_for_solid('simple').transformed_value() == 1
Пример #6
0
def execute_step_out_of_process(step_context, step):
    child_run_id = step_context.run_config.run_id

    child_run_config = RunConfig(
        run_id=child_run_id,
        tags=step_context.run_config.tags,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )
    command = InProcessExecutorChildProcessCommand(
        step_context.environment_dict,
        child_run_config,
        step_context.executor_config,
        step.key,
        step_context.instance.get_ref(),
    )

    for event_or_none in execute_child_process_command(command):
        yield event_or_none
Пример #7
0
def evaluate_config(config_type,
                    config_value,
                    pipeline=None,
                    run_config=None,
                    seen_handles=None):
    return _evaluate_config(
        TraversalContext(
            config_type=check.inst_param(config_type, 'config_type',
                                         ConfigType),
            config_value=config_value,
            stack=EvaluationStack(config_type=config_type, entries=[]),
            pipeline=check.opt_inst_param(pipeline, 'pipeline',
                                          PipelineDefinition),
            run_config=check.opt_inst_param(run_config,
                                            'run_config',
                                            IRunConfig,
                                            default=RunConfig()),
            seen_handles=check.opt_list_param(seen_handles, 'seen_handles'),
        ))
Пример #8
0
def execute_step_out_of_process(step_context, step):
    child_run_config = RunConfig(
        run_id=step_context.run_config.run_id,
        tags=step_context.run_config.tags,
        log_sink=None,
        event_callback=None,
        reexecution_config=None,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )

    with safe_tempfile_path() as log_sink_file:
        init_db(log_sink_file)
        # Although the type of is_done is threading._Event in py2, not threading.Event,
        # it is still constructed using the threading.Event() factory
        is_done = threading.Event()

        def log_watcher_thread_target():
            log_watcher = JsonSqlite3LogWatcher(log_sink_file,
                                                step_context.log, is_done)
            log_watcher.watch()

        log_watcher_thread = threading.Thread(target=log_watcher_thread_target)

        log_watcher_thread.start()

        command = InProcessExecutorChildProcessCommand(
            step_context.environment_dict,
            child_run_config,
            step_context.executor_config,
            step.key,
            log_sink_file,
        )
        try:
            for step_event in execute_child_process_command(command):
                if step_context.run_config.event_callback and isinstance(
                        step_event, DagsterEvent):
                    log_step_event(step_context, step_event)
                yield step_event

        finally:
            is_done.set()
            log_watcher_thread.join()
Пример #9
0
    def build(pipeline, environment_dict=None, run_config=None):
        check.inst_param(pipeline, 'pipeline', PipelineDefinition)
        check.opt_dict_param(environment_dict, 'environment')
        run_config = check.opt_inst_param(run_config,
                                          'run_config',
                                          IRunConfig,
                                          default=RunConfig())

        mode = run_config.mode or pipeline.get_default_mode_name()
        environment_type = create_environment_type(pipeline, mode)

        result = evaluate_config(environment_type, environment_dict, pipeline,
                                 run_config)

        if not result.success:
            raise DagsterInvalidConfigError(pipeline, result.errors,
                                            environment_dict)

        return EnvironmentConfig.from_config_value(result.value,
                                                   environment_dict)
Пример #10
0
def execute_step_out_of_process(step_context, step):

    with safe_tempfile_path() as sqlite_file:
        event_sink = SqliteEventSink(sqlite_file, raise_on_error=True)

        child_run_config = RunConfig(
            run_id=step_context.run_config.run_id,
            tags=step_context.run_config.tags,
            event_sink=event_sink,
            step_keys_to_execute=step_context.run_config.step_keys_to_execute,
            mode=step_context.run_config.mode,
        )

        command = InProcessExecutorChildProcessCommand(
            step_context.environment_dict, child_run_config,
            step_context.executor_config, step.key)

        with event_sink.log_forwarding(step_context.log):
            for event_or_none in execute_child_process_command(command):
                yield event_or_none
Пример #11
0
    def get_preset(self, name):
        check.str_param(name, 'name')
        if name not in self._preset_dict:
            raise DagsterInvariantViolationError(
                ('Could not find preset for "{name}". Available presets '
                 'for pipeline "{pipeline_name}" are {preset_names}.').format(
                     name=name,
                     preset_names=list(self._preset_dict.keys()),
                     pipeline_name=self._name))

        preset = self._preset_dict[name]

        pipeline = self
        if preset.solid_subset is not None:
            pipeline = pipeline.build_sub_pipeline(preset.solid_subset)

        return {
            'pipeline': pipeline,
            'environment_dict': preset.get_environment_dict(self._name),
            'run_config': RunConfig(mode=preset.mode),
        }
Пример #12
0
    def build(pipeline, environment_dict=None, run_config=None):
        from dagster.config.validate import process_config
        from .composite_descent import composite_descent

        check.inst_param(pipeline, 'pipeline', PipelineDefinition)
        environment_dict = check.opt_dict_param(environment_dict,
                                                'environment_dict')
        run_config = check.opt_inst_param(run_config,
                                          'run_config',
                                          IRunConfig,
                                          default=RunConfig())

        mode = run_config.mode or pipeline.get_default_mode_name()
        environment_type = create_environment_type(pipeline, mode)

        config_evr = process_config(environment_type, environment_dict)
        if not config_evr.success:
            raise DagsterInvalidConfigError(
                'Error in config for pipeline {}'.format(pipeline.name),
                config_evr.errors,
                environment_dict,
            )

        config_value = config_evr.value

        solid_config_dict = composite_descent(pipeline,
                                              config_value.get('solids', {}),
                                              run_config)

        return EnvironmentConfig(
            solids=solid_config_dict,
            execution=ExecutionConfig.from_dict(config_value.get('execution')),
            storage=StorageConfig.from_dict(config_value.get('storage')),
            loggers=config_value.get('loggers'),
            original_config_dict=environment_dict,
            resources=config_value.get('resources'),
        )