示例#1
0
def test_sink_logger():
    run_id = str(uuid.uuid4())
    with safe_tempfile_path() as sqlite3_db_path:
        sink = SqliteEventSink(sqlite3_db_path)
        sqlite3_log_manager = DagsterLogManager(run_id, {},
                                                [sink.get_logger()])

        for i in range(1000):
            sqlite3_log_manager.info('Testing ' + str(i))

        with sqlite3.connect(sqlite3_db_path) as conn:
            cursor = conn.cursor()
            count = cursor.execute('select count(1) from logs').fetchall()
            assert count[0][0] == 1000
        conn.close()
        sink.on_pipeline_teardown()
示例#2
0
def execute_step_out_of_process(step_context, step):

    with safe_tempfile_path() as sqlite_file:
        event_sink = SqliteEventSink(sqlite_file, raise_on_error=True)

        child_run_config = RunConfig(
            run_id=step_context.run_config.run_id,
            tags=step_context.run_config.tags,
            event_sink=event_sink,
            step_keys_to_execute=step_context.run_config.step_keys_to_execute,
            mode=step_context.run_config.mode,
        )

        command = InProcessExecutorChildProcessCommand(
            step_context.environment_dict, child_run_config,
            step_context.executor_config, step.key)

        with event_sink.log_forwarding(step_context.log):
            for event_or_none in execute_child_process_command(command):
                yield event_or_none
示例#3
0
def test_concurrent_multithreaded_logging():
    test_log_records = []
    run_id = str(uuid.uuid4())
    with safe_tempfile_path() as sqlite3_db_path:
        wrap_it_up = threading.Event()
        event_sink = SqliteEventSink(sqlite3_db_path)

        sqlite3_thread = threading.Thread(target=write_logs,
                                          args=(event_sink, run_id))
        test_thread = threading.Thread(target=forward_logs,
                                       args=(event_sink, wrap_it_up, run_id,
                                             test_log_records))

        sqlite3_thread.start()
        test_thread.start()

        try:
            sqlite3_thread.join()
        finally:
            wrap_it_up.set()

        assert wrap_it_up.is_set()

        test_thread.join()
        assert len(test_log_records) == 1000

        with sqlite3.connect(sqlite3_db_path) as conn:
            cursor = conn.cursor()
            count = cursor.execute('select count(1) from logs').fetchall()
            assert count[0][0] == 1000

            records = cursor.execute('select * from logs').fetchall()
            for i, record in enumerate(records):
                json_record = record[1]
                assert json_record == seven.json.dumps(
                    test_log_records[i].__dict__)
        conn.close()
示例#4
0
def test_error_during_logging(caplog):
    run_id = str(uuid.uuid4())
    with safe_tempfile_path() as sqlite3_db_path:
        event_sink = SqliteEventSink(sqlite3_db_path)

        def err_conn(*args, **kwargs):
            raise Exception('Bailing!')

        event_sink.connect = err_conn

        sqlite3_log_manager = DagsterLogManager(run_id, {},
                                                [event_sink.get_logger()])

        sqlite3_log_manager.info('Testing error handling')

        assert caplog.record_tuples == [
            ('root', 50, 'Error during logging!'),
            ('root', 40, 'Bailing!'),
        ]
        event_sink.on_pipeline_teardown()
示例#5
0
def test_sink_log_forwarding():
    test_log_records = []
    run_id = str(uuid.uuid4())
    with safe_tempfile_path() as sqlite3_db_path:
        sink = SqliteEventSink(sqlite3_db_path)

        sqlite3_log_manager = DagsterLogManager(run_id, {},
                                                [sink.get_logger()])

        for i in range(1000):
            sqlite3_log_manager.info('Testing ' + str(i))

        with sqlite3.connect(sqlite3_db_path) as conn:
            cursor = conn.cursor()
            count = cursor.execute('select count(1) from logs').fetchall()
            assert count[0][0] == 1000

            test_handler = LogTestHandler(test_log_records)
            test_logger_def = construct_single_handler_logger(
                'test', 'debug', test_handler)
            test_logger = test_logger_def.logger_fn(
                dummy_init_logger_context(test_logger_def, run_id))
            sqlite3_watcher_log_manager = DagsterLogManager(
                run_id, {}, [test_logger])

            with sink.log_forwarding(sqlite3_watcher_log_manager):
                pass

            assert len(test_log_records) == 1000

            records = cursor.execute('select * from logs').fetchall()
            for i, record in enumerate(records):
                json_record = record[1]
                assert json_record == seven.json.dumps(
                    test_log_records[i].__dict__)

        conn.close()
        sink.on_pipeline_teardown()
示例#6
0
    def reconstitute_pipeline_context(
        self,
        output_log_path=None,
        marshal_dir=None,
        environment_dict=None,
        handle_kwargs=None,
        run_config_kwargs=None,
        solid_subset=None,
        solid_handle_kwargs=None,
    ):
        '''Reconstitutes a context for dagstermill-managed execution.

        You'll see this function called to reconstruct a pipeline context within the ``injected
        parameters`` cell of a dagstermill output notebook. Users should not call this function
        interactively except when debugging output notebooks.

        Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a
        context for interactive exploration and development. This call will be replaced by one to
        :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by
        dagstermill.
        '''
        check.opt_str_param(output_log_path, 'output_log_path')
        check.opt_str_param(marshal_dir, 'marshal_dir')
        environment_dict = check.opt_dict_param(environment_dict,
                                                'environment_dict',
                                                key_type=str)
        check.dict_param(run_config_kwargs, 'run_config_kwargs')
        check.dict_param(handle_kwargs, 'handle_kwargs')
        check.opt_list_param(solid_subset, 'solid_subset', of_type=str)
        check.dict_param(solid_handle_kwargs, 'solid_handle_kwargs')

        try:
            handle = load_handle.handle_for_pipeline_cli_args(
                handle_kwargs, use_default_repository_yaml=False)
        except (check.CheckError, load_handle.CliUsageError) as err:
            six.raise_from(
                DagstermillError(
                    'Cannot invoke a dagstermill solid from an in-memory pipeline that was not loaded '
                    'from an ExecutionTargetHandle. Run this pipeline using dagit, the dagster CLI, '
                    'through dagster-graphql, or in-memory after loading it through an '
                    'ExecutionTargetHandle.'),
                err,
            )

        pipeline_def = check.inst_param(
            handle.build_pipeline_definition(),
            'pipeline_def (from handle {handle_dict})'.format(
                handle_dict=handle.data._asdict()),
            PipelineDefinition,
        ).build_sub_pipeline(solid_subset)

        solid_handle = SolidHandle.from_dict(solid_handle_kwargs)
        solid_def = pipeline_def.get_solid(solid_handle)

        run_config = RunConfig(**run_config_kwargs)
        # since we are rehydrating the SqliteEventSink we will skip the db init
        run_config = run_config.with_event_sink(
            SqliteEventSink(output_log_path, skip_db_init=True))

        self.marshal_dir = marshal_dir
        self.in_pipeline = True
        self.solid_def = solid_def
        self.pipeline_def = pipeline_def

        with scoped_pipeline_context(
                self.pipeline_def,
                environment_dict,
                run_config,
                scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:
            self.context = DagstermillExecutionContext(pipeline_context)

        return self.context
示例#7
0
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, 'compute_context', ComputeExecutionContext)
        check.param_invariant(
            isinstance(compute_context.environment_dict, dict),
            'context',
            'SystemComputeExecutionContext must have valid environment_dict',
        )

        system_compute_context = compute_context.get_system_context()

        base_dir = '/tmp/dagstermill/{run_id}/'.format(run_id=compute_context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')
        mkdir_p(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4()))
        )

        with safe_tempfile_path() as output_log_path:
            event_sink = SqliteEventSink(output_log_path)

            # Scaffold the registration here
            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                system_compute_context,
                nb,
                get_papermill_parameters(system_compute_context, inputs, output_log_path),
            )
            intermediate_path = os.path.join(
                output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4()))
            )
            write_ipynb(nb_no_parameters, intermediate_path)

            with user_code_error_boundary(
                DagstermillExecutionError,
                lambda: (
                    'Error occurred during the execution of Dagstermill solid '
                    '{solid_name}: {notebook_path}'.format(
                        solid_name=name, notebook_path=notebook_path
                    )
                ),
            ):
                with event_sink.log_forwarding(system_compute_context.log):
                    try:
                        papermill_engines.register('dagstermill', DagstermillNBConvertEngine)
                        papermill.execute_notebook(
                            intermediate_path, temp_path, engine_name='dagstermill', log_output=True
                        )
                    except Exception as exc:
                        yield Materialization(
                            label='output_notebook',
                            description='Location of output notebook on the filesystem',
                            metadata_entries=[EventMetadataEntry.fspath(temp_path)],
                        )
                        raise exc

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(temp_path)

            system_compute_context.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.format(
                    name=name, data=output_nb.scraps
                )
            )

            yield Materialization(
                label='output_notebook',
                description='Location of output notebook on the filesystem',
                metadata_entries=[EventMetadataEntry.fspath(temp_path)],
            )

            for (output_name, output_def) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.runtime_type, data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith('event-'):
                    with open(value.data, 'rb') as fd:
                        yield pickle.loads(fd.read())