示例#1
0
def test_event_log_get_stats_for_run(event_storage_factory_cm_fn):
    import math

    with event_storage_factory_cm_fn() as storage:
        enqueued_time = time.time()
        launched_time = enqueued_time + 20
        start_time = launched_time + 50
        storage.store_event(
            DagsterEventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                enqueued_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_ENQUEUED.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            DagsterEventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                launched_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_STARTING.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            DagsterEventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                start_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_START.value,
                    "nonce",
                ),
            ))
        assert math.isclose(
            storage.get_stats_for_run("foo").enqueued_time, enqueued_time)
        assert math.isclose(
            storage.get_stats_for_run("foo").launch_time, launched_time)
        assert math.isclose(
            storage.get_stats_for_run("foo").start_time, start_time)
示例#2
0
    def attempt_to_launch_runs(self):
        max_runs_to_launch = self._max_concurrent_runs - self._count_in_progress_runs(
        )

        # Possibly under 0 if runs were launched without queuing
        if max_runs_to_launch <= 0:
            return

        runs = self._get_queued_runs(limit=max_runs_to_launch)

        for run in runs:
            with external_pipeline_from_run(run) as external_pipeline:
                enqueued_event = DagsterEvent(
                    event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value,
                    pipeline_name=run.pipeline_name,
                )
                event_record = DagsterEventRecord(
                    message="",
                    user_message="",
                    level=logging.INFO,
                    pipeline_name=run.pipeline_name,
                    run_id=run.run_id,
                    error_info=None,
                    timestamp=time.time(),
                    dagster_event=enqueued_event,
                )
                self._instance.handle_new_event(event_record)

                self._instance.launch_run(run.run_id, external_pipeline)
示例#3
0
def build_synthetic_pipeline_error_record(run_id, error_info, pipeline_name):
    check.str_param(run_id, 'run_id')
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(error_info, 'error_info', SerializableErrorInfo)

    return DagsterEventRecord(
        message=error_info.message + '\nStack Trace:\n' +
        '\n'.join(error_info.stack),
        # Currently it is the user_message that is displayed to the user client side
        # in dagit even though that was not the original intent. The original
        # intent was that the user_message was the message generated by user code
        # communicated directly to the client. We need to rationalize the treatment
        # of these different error messages
        user_message=
        ('An exception was thrown during execution that is likely a framework error, '
         'rather than an error in user code.') + '\nOriginal error message: ' +
        error_info.message + '\nStack Trace:\n' + '\n'.join(error_info.stack),
        level=logging.ERROR,
        run_id=run_id,
        timestamp=time.time(),
        error_info=error_info,
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(DagsterEventType.PIPELINE_FAILURE.value,
                                   pipeline_name),
    )
    def _dequeue_run(self, run):
        with external_pipeline_from_run(run) as external_pipeline:
            # double check that the run is still queued before dequeing
            reloaded_run = self._instance.get_run_by_id(run.run_id)

            if reloaded_run.status != PipelineRunStatus.QUEUED:
                self._logger.info(
                    "Run {run_id} is now {status} instead of QUEUED, skipping".
                    format(run_id=reloaded_run.run_id,
                           status=reloaded_run.status))
                return

            dequeued_event = DagsterEvent(
                event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value,
                pipeline_name=run.pipeline_name,
            )
            event_record = DagsterEventRecord(
                message="",
                user_message="",
                level=logging.INFO,
                pipeline_name=run.pipeline_name,
                run_id=run.run_id,
                error_info=None,
                timestamp=time.time(),
                dagster_event=dequeued_event,
            )
            self._instance.handle_new_event(event_record)

            self._instance.launch_run(run.run_id, external_pipeline)
示例#5
0
def handle_execute_plan_result_raw(res):
    res_data = res['data']['executePlan']

    res_type = res_data['__typename']

    handle_error_states(res_type, res_data)

    if res_type == 'ExecutePlanSuccess':
        raw_event_records = [
            DagsterEventRecord(
                event_record.error_info,
                event_record.message,
                event_record.level,
                event_record.user_message,
                event_record.run_id,
                event_record.timestamp,
                event_record.step_key,
                event_record.pipeline_name,
                event_record.dagster_event,
            ) for event_record in [
                deserialize_json_to_dagster_namedtuple(e)
                for e in res_data['rawEventRecords']
            ]
        ]
        return raw_event_records

    raise DagsterGraphQLClientError('Unexpected result type')
示例#6
0
def test_correct_timezone(conn_string):
    event_log_storage = PostgresEventLogStorage.create_clean_storage(
        conn_string)

    curr_time = time.time()

    event = DagsterEventRecord(
        None,
        "Message2",
        "debug",
        "",
        "foo",
        curr_time,
        dagster_event=DagsterEvent(
            DagsterEventType.PIPELINE_START.value,
            "nonce",
            event_specific_data=EngineEventData.in_process(999),
        ),
    )

    event_log_storage.store_event(event)

    logs = event_log_storage.get_logs_for_run("foo")

    assert len(logs) == 1

    log = logs[0]

    stats = event_log_storage.get_stats_for_run("foo")

    assert int(log.timestamp) == int(stats.start_time)
    assert int(log.timestamp) == int(curr_time)
示例#7
0
def test_event_log_storage_store_with_multiple_runs(
        event_storage_factory_cm_fn):
    with event_storage_factory_cm_fn() as storage:
        runs = ['foo', 'bar', 'baz']
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
            storage.store_event(
                DagsterEventRecord(
                    None,
                    'Message2',
                    'debug',
                    '',
                    run_id,
                    time.time(),
                    dagster_event=DagsterEvent(
                        DagsterEventType.STEP_SUCCESS.value,
                        'nonce',
                        event_specific_data=StepSuccessData(duration_ms=100.0),
                    ),
                ))

        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 1
            assert storage.get_stats_for_run(run_id).steps_succeeded == 1

        storage.wipe()
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
示例#8
0
    def report_run_failed(self, pipeline_run):
        from dagster.core.events import DagsterEvent, DagsterEventType
        from dagster.core.events.log import DagsterEventRecord

        check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
        message = "This pipeline run has been marked as failed from outside the execution context."

        dagster_event = DagsterEvent(
            event_type_value=DagsterEventType.PIPELINE_FAILURE.value,
            pipeline_name=pipeline_run.pipeline_name,
            message=message,
        )
        event_record = DagsterEventRecord(
            message=message,
            user_message=message,
            level=logging.ERROR,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            dagster_event=dagster_event,
        )

        self.handle_new_event(event_record)
        return dagster_event
示例#9
0
def _event_record(run_id,
                  solid_name,
                  timestamp,
                  event_type,
                  event_specific_data=None):
    pipeline_name = "pipeline_name"
    solid_handle = SolidHandle(solid_name, None)
    step_handle = StepHandle(solid_handle)
    return DagsterEventRecord(
        None,
        "",
        "debug",
        "",
        run_id,
        timestamp,
        step_key=step_handle.to_key(),
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            event_type.value,
            pipeline_name,
            solid_handle=solid_handle,
            step_handle=step_handle,
            event_specific_data=event_specific_data,
        ),
    )
示例#10
0
def build_process_started_event(run_id, pipeline_name, process_id):
    message = 'Started process for pipeline (pid: {process_id}).'.format(
        process_id=process_id)

    return DagsterEventRecord(
        message=message,
        user_message=message,
        level=logging.INFO,
        run_id=run_id,
        timestamp=time.time(),
        error_info=None,
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            message=message,
            event_type_value=DagsterEventType.PIPELINE_PROCESS_STARTED.value,
            pipeline_name=pipeline_name,
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
            event_specific_data=PipelineProcessStartedData(
                pipeline_name=pipeline_name,
                run_id=run_id,
                process_id=process_id),
        ),
    )
示例#11
0
    def report_engine_event(
        self,
        message,
        pipeline_run,
        engine_event_data=None,
        cls=None,
        step_key=None,
    ):
        '''
        Report a EngineEvent that occurred outside of a pipeline execution context.
        '''
        from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType
        from dagster.core.events.log import DagsterEventRecord

        check.class_param(cls, 'cls')
        check.str_param(message, 'message')
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
        engine_event_data = check.opt_inst_param(
            engine_event_data,
            'engine_event_data',
            EngineEventData,
            EngineEventData([]),
        )

        if cls:
            message = "[{}] {}".format(cls.__name__, message)

        log_level = logging.INFO
        if engine_event_data and engine_event_data.error:
            log_level = logging.ERROR

        dagster_event = DagsterEvent(
            event_type_value=DagsterEventType.ENGINE_EVENT.value,
            pipeline_name=pipeline_run.pipeline_name,
            message=message,
            event_specific_data=engine_event_data,
        )
        event_record = DagsterEventRecord(
            message=message,
            user_message=message,
            level=log_level,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            step_key=step_key,
            dagster_event=dagster_event,
        )

        self.handle_new_event(event_record)
        return dagster_event
示例#12
0
 def evt(name):
     return DagsterEventRecord(
         None,
         name,
         'debug',
         '',
         'foo',
         time.time(),
         dagster_event=DagsterEvent(
             DagsterEventType.ENGINE_EVENT.value,
             'nonce',
             event_specific_data=EngineEventData.in_process(999),
         ),
     )
示例#13
0
 def evt(name):
     return DagsterEventRecord(
         None,
         name,
         "debug",
         "",
         "foo",
         time.time(),
         dagster_event=DagsterEvent(
             DagsterEventType.ENGINE_EVENT.value,
             "nonce",
             event_specific_data=EngineEventData.in_process(999),
         ),
     )
示例#14
0
def build_process_start_event(run_id, pipeline_name):
    message = 'About to start process for pipeline {pipeline_name} run_id {run_id}'.format(
        pipeline_name=pipeline_name, run_id=run_id)

    return DagsterEventRecord(
        message=message,
        user_message=message,
        level=logging.INFO,
        run_id=run_id,
        timestamp=time.time(),
        error_info=None,
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            DagsterEventType.PIPELINE_PROCESS_START.value, pipeline_name),
    )
示例#15
0
def _materialization_event_record(run_id, asset_key):
    return DagsterEventRecord(
        None,
        "",
        "debug",
        "",
        run_id,
        time.time() - 25,
        step_key="my_step_key",
        pipeline_name="my_pipeline",
        dagster_event=DagsterEvent(
            DagsterEventType.STEP_MATERIALIZATION.value,
            "my_pipeline",
            step_key="my_step_key",
            event_specific_data=StepMaterializationData(AssetMaterialization(asset_key=asset_key)),
        ),
    )
示例#16
0
def _event_record(run_id, step_key, timestamp, event_type, event_specific_data=None):
    pipeline_name = 'pipeline_name'
    return DagsterEventRecord(
        None,
        '',
        'debug',
        '',
        run_id,
        timestamp,
        step_key=step_key,
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            event_type.value,
            pipeline_name,
            step_key=step_key,
            event_specific_data=event_specific_data,
        ),
    )
    def _dequeue_run(self, run):
        with external_pipeline_from_run(run) as external_pipeline:
            dequeued_event = DagsterEvent(
                event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value,
                pipeline_name=run.pipeline_name,
            )
            event_record = DagsterEventRecord(
                message="",
                user_message="",
                level=logging.INFO,
                pipeline_name=run.pipeline_name,
                run_id=run.run_id,
                error_info=None,
                timestamp=time.time(),
                dagster_event=dequeued_event,
            )
            self._instance.handle_new_event(event_record)

            self._instance.launch_run(run.run_id, external_pipeline)
示例#18
0
def test_in_memory_event_log_storage_store_events_and_wipe():
    storage = InMemoryEventLogStorage()
    assert len(storage.get_logs_for_run('foo')) == 0
    storage.store_event(
        DagsterEventRecord(
            None,
            'Message2',
            'debug',
            '',
            'foo',
            time.time(),
            dagster_event=DagsterEvent(
                DagsterEventType.ENGINE_EVENT.value,
                'nonce',
                event_specific_data=EngineEventData.in_process(999),
            ),
        ))
    assert len(storage.get_logs_for_run('foo')) == 1
    storage.wipe()
    assert len(storage.get_logs_for_run('foo')) == 0
示例#19
0
def test_event_log_delete(event_storage_factory_cm_fn):
    with event_storage_factory_cm_fn() as storage:
        assert len(storage.get_logs_for_run("foo")) == 0
        storage.store_event(
            DagsterEventRecord(
                None,
                "Message2",
                "debug",
                "",
                "foo",
                time.time(),
                dagster_event=DagsterEvent(
                    DagsterEventType.ENGINE_EVENT.value,
                    "nonce",
                    event_specific_data=EngineEventData.in_process(999),
                ),
            ))
        assert len(storage.get_logs_for_run("foo")) == 1
        assert storage.get_stats_for_run("foo")
        storage.delete_events("foo")
        assert len(storage.get_logs_for_run("foo")) == 0
示例#20
0
def test_event_log_storage_store_events_and_wipe(event_storage_factory_cm_fn):
    with event_storage_factory_cm_fn() as storage:
        assert len(storage.get_logs_for_run('foo')) == 0
        storage.store_event(
            DagsterEventRecord(
                None,
                'Message2',
                'debug',
                '',
                'foo',
                time.time(),
                dagster_event=DagsterEvent(
                    DagsterEventType.ENGINE_EVENT.value,
                    'nonce',
                    event_specific_data=EngineEventData.in_process(999),
                ),
            ))
        assert len(storage.get_logs_for_run('foo')) == 1
        assert storage.get_stats_for_run('foo')
        storage.wipe()
        assert len(storage.get_logs_for_run('foo')) == 0
示例#21
0
def test_filesystem_event_log_storage_store_events_and_wipe():
    with seven.TemporaryDirectory() as tmpdir_path:
        storage = SqliteEventLogStorage(tmpdir_path)
        assert len(storage.get_logs_for_run('foo')) == 0
        storage.store_event(
            DagsterEventRecord(
                None,
                'Message2',
                'debug',
                '',
                'foo',
                time.time(),
                dagster_event=DagsterEvent(
                    DagsterEventType.ENGINE_EVENT.value,
                    'nonce',
                    event_specific_data=EngineEventData.in_process(999),
                ),
            ))
        assert len(storage.get_logs_for_run('foo')) == 1
        storage.wipe()
        assert len(storage.get_logs_for_run('foo')) == 0
    def run_iteration(self):
        in_progress = self._count_in_progress_runs()
        max_runs_to_launch = self._max_concurrent_runs - in_progress

        # Possibly under 0 if runs were launched without queuing
        if max_runs_to_launch <= 0:
            self._logger.info(
                "{} runs are currently in progress. Maximum is {}, won't launch more.".format(
                    in_progress, self._max_concurrent_runs
                )
            )
            return

        queued_runs = self._get_queued_runs(limit=max_runs_to_launch)

        if not queued_runs:
            self._logger.info("Poll returned no queued runs.")
        else:
            self._logger.info("Retrieved {} queued runs to launch.".format(len(queued_runs)))

        for run in queued_runs:
            with external_pipeline_from_run(run) as external_pipeline:
                enqueued_event = DagsterEvent(
                    event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value,
                    pipeline_name=run.pipeline_name,
                )
                event_record = DagsterEventRecord(
                    message="",
                    user_message="",
                    level=logging.INFO,
                    pipeline_name=run.pipeline_name,
                    run_id=run.run_id,
                    error_info=None,
                    timestamp=time.time(),
                    dagster_event=enqueued_event,
                )
                self._instance.handle_new_event(event_record)

                self._instance.launch_run(run.run_id, external_pipeline)
    def submit_run(self, pipeline_run, external_pipeline):
        check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
        check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

        enqueued_event = DagsterEvent(
            event_type_value=DagsterEventType.PIPELINE_ENQUEUED.value,
            pipeline_name=pipeline_run.pipeline_name,
        )
        event_record = DagsterEventRecord(
            message="",
            user_message="",
            level=logging.INFO,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            dagster_event=enqueued_event,
        )
        self._instance.handle_new_event(event_record)

        return self._instance.get_run_by_id(pipeline_run.run_id)
示例#24
0
def build_process_start_event(run_id, pipeline_name):
    check.str_param(pipeline_name, 'pipeline_name')
    check.str_param(run_id, 'run_id')
    message = 'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).'.format(
        pipeline_name=pipeline_name, run_id=run_id)

    return DagsterEventRecord(
        message=message,
        user_message=message,
        level=logging.INFO,
        run_id=run_id,
        timestamp=time.time(),
        error_info=None,
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            message=message,
            event_type_value=DagsterEventType.PIPELINE_PROCESS_START.value,
            pipeline_name=pipeline_name,
            event_specific_data=PipelineProcessStartData(
                pipeline_name, run_id),
        ),
    )
示例#25
0
def construct_step_failure_event_and_handle(pipeline_run, step_key, err, instance):
    step_failure_event = DagsterEvent(
        event_type_value=DagsterEventType.STEP_FAILURE.value,
        pipeline_name=pipeline_run.pipeline_name,
        step_key=step_key,
        event_specific_data=StepFailureData(
            error=serializable_error_info_from_exc_info(sys.exc_info()),
            user_failure_data=UserFailureData(label="K8sError"),
        ),
    )
    event_record = DagsterEventRecord(
        message=str(err),
        user_message=str(err),
        level=logging.ERROR,
        pipeline_name=pipeline_run.pipeline_name,
        run_id=pipeline_run.run_id,
        error_info=None,
        step_key=step_key,
        timestamp=time.time(),
        dagster_event=step_failure_event,
    )
    instance.handle_new_event(event_record)
    return step_failure_event