def test_event_log_get_stats_for_run(event_storage_factory_cm_fn): import math with event_storage_factory_cm_fn() as storage: enqueued_time = time.time() launched_time = enqueued_time + 20 start_time = launched_time + 50 storage.store_event( DagsterEventRecord( None, "message", "debug", "", "foo", enqueued_time, dagster_event=DagsterEvent( DagsterEventType.PIPELINE_ENQUEUED.value, "nonce", ), )) storage.store_event( DagsterEventRecord( None, "message", "debug", "", "foo", launched_time, dagster_event=DagsterEvent( DagsterEventType.PIPELINE_STARTING.value, "nonce", ), )) storage.store_event( DagsterEventRecord( None, "message", "debug", "", "foo", start_time, dagster_event=DagsterEvent( DagsterEventType.PIPELINE_START.value, "nonce", ), )) assert math.isclose( storage.get_stats_for_run("foo").enqueued_time, enqueued_time) assert math.isclose( storage.get_stats_for_run("foo").launch_time, launched_time) assert math.isclose( storage.get_stats_for_run("foo").start_time, start_time)
def attempt_to_launch_runs(self): max_runs_to_launch = self._max_concurrent_runs - self._count_in_progress_runs( ) # Possibly under 0 if runs were launched without queuing if max_runs_to_launch <= 0: return runs = self._get_queued_runs(limit=max_runs_to_launch) for run in runs: with external_pipeline_from_run(run) as external_pipeline: enqueued_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value, pipeline_name=run.pipeline_name, ) event_record = DagsterEventRecord( message="", user_message="", level=logging.INFO, pipeline_name=run.pipeline_name, run_id=run.run_id, error_info=None, timestamp=time.time(), dagster_event=enqueued_event, ) self._instance.handle_new_event(event_record) self._instance.launch_run(run.run_id, external_pipeline)
def build_synthetic_pipeline_error_record(run_id, error_info, pipeline_name): check.str_param(run_id, 'run_id') check.str_param(pipeline_name, 'pipeline_name') check.inst_param(error_info, 'error_info', SerializableErrorInfo) return DagsterEventRecord( message=error_info.message + '\nStack Trace:\n' + '\n'.join(error_info.stack), # Currently it is the user_message that is displayed to the user client side # in dagit even though that was not the original intent. The original # intent was that the user_message was the message generated by user code # communicated directly to the client. We need to rationalize the treatment # of these different error messages user_message= ('An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.') + '\nOriginal error message: ' + error_info.message + '\nStack Trace:\n' + '\n'.join(error_info.stack), level=logging.ERROR, run_id=run_id, timestamp=time.time(), error_info=error_info, pipeline_name=pipeline_name, dagster_event=DagsterEvent(DagsterEventType.PIPELINE_FAILURE.value, pipeline_name), )
def _dequeue_run(self, run): with external_pipeline_from_run(run) as external_pipeline: # double check that the run is still queued before dequeing reloaded_run = self._instance.get_run_by_id(run.run_id) if reloaded_run.status != PipelineRunStatus.QUEUED: self._logger.info( "Run {run_id} is now {status} instead of QUEUED, skipping". format(run_id=reloaded_run.run_id, status=reloaded_run.status)) return dequeued_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value, pipeline_name=run.pipeline_name, ) event_record = DagsterEventRecord( message="", user_message="", level=logging.INFO, pipeline_name=run.pipeline_name, run_id=run.run_id, error_info=None, timestamp=time.time(), dagster_event=dequeued_event, ) self._instance.handle_new_event(event_record) self._instance.launch_run(run.run_id, external_pipeline)
def handle_execute_plan_result_raw(res): res_data = res['data']['executePlan'] res_type = res_data['__typename'] handle_error_states(res_type, res_data) if res_type == 'ExecutePlanSuccess': raw_event_records = [ DagsterEventRecord( event_record.error_info, event_record.message, event_record.level, event_record.user_message, event_record.run_id, event_record.timestamp, event_record.step_key, event_record.pipeline_name, event_record.dagster_event, ) for event_record in [ deserialize_json_to_dagster_namedtuple(e) for e in res_data['rawEventRecords'] ] ] return raw_event_records raise DagsterGraphQLClientError('Unexpected result type')
def test_correct_timezone(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) curr_time = time.time() event = DagsterEventRecord( None, "Message2", "debug", "", "foo", curr_time, dagster_event=DagsterEvent( DagsterEventType.PIPELINE_START.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), ) event_log_storage.store_event(event) logs = event_log_storage.get_logs_for_run("foo") assert len(logs) == 1 log = logs[0] stats = event_log_storage.get_stats_for_run("foo") assert int(log.timestamp) == int(stats.start_time) assert int(log.timestamp) == int(curr_time)
def test_event_log_storage_store_with_multiple_runs( event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: runs = ['foo', 'bar', 'baz'] for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', run_id, time.time(), dagster_event=DagsterEvent( DagsterEventType.STEP_SUCCESS.value, 'nonce', event_specific_data=StepSuccessData(duration_ms=100.0), ), )) for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 1 assert storage.get_stats_for_run(run_id).steps_succeeded == 1 storage.wipe() for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0
def report_run_failed(self, pipeline_run): from dagster.core.events import DagsterEvent, DagsterEventType from dagster.core.events.log import DagsterEventRecord check.inst_param(pipeline_run, "pipeline_run", PipelineRun) message = "This pipeline run has been marked as failed from outside the execution context." dagster_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_FAILURE.value, pipeline_name=pipeline_run.pipeline_name, message=message, ) event_record = DagsterEventRecord( message=message, user_message=message, level=logging.ERROR, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), dagster_event=dagster_event, ) self.handle_new_event(event_record) return dagster_event
def _event_record(run_id, solid_name, timestamp, event_type, event_specific_data=None): pipeline_name = "pipeline_name" solid_handle = SolidHandle(solid_name, None) step_handle = StepHandle(solid_handle) return DagsterEventRecord( None, "", "debug", "", run_id, timestamp, step_key=step_handle.to_key(), pipeline_name=pipeline_name, dagster_event=DagsterEvent( event_type.value, pipeline_name, solid_handle=solid_handle, step_handle=step_handle, event_specific_data=event_specific_data, ), )
def build_process_started_event(run_id, pipeline_name, process_id): message = 'Started process for pipeline (pid: {process_id}).'.format( process_id=process_id) return DagsterEventRecord( message=message, user_message=message, level=logging.INFO, run_id=run_id, timestamp=time.time(), error_info=None, pipeline_name=pipeline_name, dagster_event=DagsterEvent( message=message, event_type_value=DagsterEventType.PIPELINE_PROCESS_STARTED.value, pipeline_name=pipeline_name, step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, event_specific_data=PipelineProcessStartedData( pipeline_name=pipeline_name, run_id=run_id, process_id=process_id), ), )
def report_engine_event( self, message, pipeline_run, engine_event_data=None, cls=None, step_key=None, ): ''' Report a EngineEvent that occurred outside of a pipeline execution context. ''' from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType from dagster.core.events.log import DagsterEventRecord check.class_param(cls, 'cls') check.str_param(message, 'message') check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) engine_event_data = check.opt_inst_param( engine_event_data, 'engine_event_data', EngineEventData, EngineEventData([]), ) if cls: message = "[{}] {}".format(cls.__name__, message) log_level = logging.INFO if engine_event_data and engine_event_data.error: log_level = logging.ERROR dagster_event = DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=pipeline_run.pipeline_name, message=message, event_specific_data=engine_event_data, ) event_record = DagsterEventRecord( message=message, user_message=message, level=log_level, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), step_key=step_key, dagster_event=dagster_event, ) self.handle_new_event(event_record) return dagster_event
def evt(name): return DagsterEventRecord( None, name, 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )
def evt(name): return DagsterEventRecord( None, name, "debug", "", "foo", time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )
def build_process_start_event(run_id, pipeline_name): message = 'About to start process for pipeline {pipeline_name} run_id {run_id}'.format( pipeline_name=pipeline_name, run_id=run_id) return DagsterEventRecord( message=message, user_message=message, level=logging.INFO, run_id=run_id, timestamp=time.time(), error_info=None, pipeline_name=pipeline_name, dagster_event=DagsterEvent( DagsterEventType.PIPELINE_PROCESS_START.value, pipeline_name), )
def _materialization_event_record(run_id, asset_key): return DagsterEventRecord( None, "", "debug", "", run_id, time.time() - 25, step_key="my_step_key", pipeline_name="my_pipeline", dagster_event=DagsterEvent( DagsterEventType.STEP_MATERIALIZATION.value, "my_pipeline", step_key="my_step_key", event_specific_data=StepMaterializationData(AssetMaterialization(asset_key=asset_key)), ), )
def _event_record(run_id, step_key, timestamp, event_type, event_specific_data=None): pipeline_name = 'pipeline_name' return DagsterEventRecord( None, '', 'debug', '', run_id, timestamp, step_key=step_key, pipeline_name=pipeline_name, dagster_event=DagsterEvent( event_type.value, pipeline_name, step_key=step_key, event_specific_data=event_specific_data, ), )
def _dequeue_run(self, run): with external_pipeline_from_run(run) as external_pipeline: dequeued_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value, pipeline_name=run.pipeline_name, ) event_record = DagsterEventRecord( message="", user_message="", level=logging.INFO, pipeline_name=run.pipeline_name, run_id=run.run_id, error_info=None, timestamp=time.time(), dagster_event=dequeued_event, ) self._instance.handle_new_event(event_record) self._instance.launch_run(run.run_id, external_pipeline)
def test_in_memory_event_log_storage_store_events_and_wipe(): storage = InMemoryEventLogStorage() assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def test_event_log_delete(event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: assert len(storage.get_logs_for_run("foo")) == 0 storage.store_event( DagsterEventRecord( None, "Message2", "debug", "", "foo", time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run("foo")) == 1 assert storage.get_stats_for_run("foo") storage.delete_events("foo") assert len(storage.get_logs_for_run("foo")) == 0
def test_event_log_storage_store_events_and_wipe(event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 assert storage.get_stats_for_run('foo') storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def test_filesystem_event_log_storage_store_events_and_wipe(): with seven.TemporaryDirectory() as tmpdir_path: storage = SqliteEventLogStorage(tmpdir_path) assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def run_iteration(self): in_progress = self._count_in_progress_runs() max_runs_to_launch = self._max_concurrent_runs - in_progress # Possibly under 0 if runs were launched without queuing if max_runs_to_launch <= 0: self._logger.info( "{} runs are currently in progress. Maximum is {}, won't launch more.".format( in_progress, self._max_concurrent_runs ) ) return queued_runs = self._get_queued_runs(limit=max_runs_to_launch) if not queued_runs: self._logger.info("Poll returned no queued runs.") else: self._logger.info("Retrieved {} queued runs to launch.".format(len(queued_runs))) for run in queued_runs: with external_pipeline_from_run(run) as external_pipeline: enqueued_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value, pipeline_name=run.pipeline_name, ) event_record = DagsterEventRecord( message="", user_message="", level=logging.INFO, pipeline_name=run.pipeline_name, run_id=run.run_id, error_info=None, timestamp=time.time(), dagster_event=enqueued_event, ) self._instance.handle_new_event(event_record) self._instance.launch_run(run.run_id, external_pipeline)
def submit_run(self, pipeline_run, external_pipeline): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) enqueued_event = DagsterEvent( event_type_value=DagsterEventType.PIPELINE_ENQUEUED.value, pipeline_name=pipeline_run.pipeline_name, ) event_record = DagsterEventRecord( message="", user_message="", level=logging.INFO, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), dagster_event=enqueued_event, ) self._instance.handle_new_event(event_record) return self._instance.get_run_by_id(pipeline_run.run_id)
def build_process_start_event(run_id, pipeline_name): check.str_param(pipeline_name, 'pipeline_name') check.str_param(run_id, 'run_id') message = 'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).'.format( pipeline_name=pipeline_name, run_id=run_id) return DagsterEventRecord( message=message, user_message=message, level=logging.INFO, run_id=run_id, timestamp=time.time(), error_info=None, pipeline_name=pipeline_name, dagster_event=DagsterEvent( message=message, event_type_value=DagsterEventType.PIPELINE_PROCESS_START.value, pipeline_name=pipeline_name, event_specific_data=PipelineProcessStartData( pipeline_name, run_id), ), )
def construct_step_failure_event_and_handle(pipeline_run, step_key, err, instance): step_failure_event = DagsterEvent( event_type_value=DagsterEventType.STEP_FAILURE.value, pipeline_name=pipeline_run.pipeline_name, step_key=step_key, event_specific_data=StepFailureData( error=serializable_error_info_from_exc_info(sys.exc_info()), user_failure_data=UserFailureData(label="K8sError"), ), ) event_record = DagsterEventRecord( message=str(err), user_message=str(err), level=logging.ERROR, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, step_key=step_key, timestamp=time.time(), dagster_event=step_failure_event, ) instance.handle_new_event(event_record) return step_failure_event