def execute_step_out_of_process(step_context, step): if step_context.run_config.loggers: step_context.log.debug( 'Loggers cannot be injected via RunConfig using the multiprocess executor. Define ' 'loggers on the mode instead. Ignoring loggers: [{logger_names}]'. format(logger_names=', '.join([ '\'{name}\''.format(name=logger.name) for logger in step_context.run_config.loggers ]))) run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, loggers=None, event_callback=None, reexecution_config=None, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) command = InProcessExecutorChildProcessCommand( step_context.environment_dict, run_config, step_context.executor_config, step.key) for step_event in execute_child_process_command(command): if step_context.run_config.event_callback and isinstance( step_event, DagsterEvent): log_step_event(step_context, step_event) yield step_event
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: # If this is being called within a `capture_interrupts` context, allow interrupts while # waiting for the execution to complete, so that we can terminate slow or hanging steps with raise_execution_interrupts(): self.databricks_runner.wait_for_run_to_complete( log, databricks_run_id) finally: if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id) for event in self.get_step_events(run_id, step_key): log_step_event(step_context, event) yield event
def _log_new_events(self, events, plan_context, running_steps): # Note: this could lead to duplicated events if the returned events were already logged # (they shouldn't be) for event in events: log_step_event( plan_context.for_step(running_steps[event.step_key]), event, )
def wait_for_completion_and_log(self, log, run_id, step_key, emr_step_id, step_context): s3 = boto3.resource("s3", region_name=self.region_name) try: for event in self.wait_for_completion(log, s3, run_id, step_key, emr_step_id): log_step_event(step_context, event) yield event except EmrError as emr_error: if self.wait_for_logs: self._log_logs_from_s3(log, emr_step_id) raise emr_error if self.wait_for_logs: self._log_logs_from_s3(log, emr_step_id)
def execute_step_out_of_process(step_context, step): child_run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, log_sink=None, event_callback=None, reexecution_config=None, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) with safe_tempfile_path() as log_sink_file: init_db(log_sink_file) # Although the type of is_done is threading._Event in py2, not threading.Event, # it is still constructed using the threading.Event() factory is_done = threading.Event() def log_watcher_thread_target(): log_watcher = JsonSqlite3LogWatcher(log_sink_file, step_context.log, is_done) log_watcher.watch() log_watcher_thread = threading.Thread(target=log_watcher_thread_target) log_watcher_thread.start() command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key, log_sink_file, ) try: for step_event in execute_child_process_command(command): if step_context.run_config.event_callback and isinstance( step_event, DagsterEvent): log_step_event(step_context, step_event) yield step_event finally: is_done.set() log_watcher_thread.join()
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._post_artifacts(log, step_run_ref, run_id, step_key) emr_step_def = self._get_emr_step_def(run_id, step_key, step_context.solid.name) emr_step_id = self.emr_job_runner.add_job_flow_steps( log, self.cluster_id, [emr_step_def])[0] self.emr_job_runner.wait_for_emr_steps_to_complete( log, self.cluster_id, [emr_step_id]) if self.wait_for_logs: self._log_logs_from_s3(log, emr_step_id) for event in self.get_step_events(step_context, run_id, step_key): log_step_event(step_context, event) yield event
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: self.databricks_runner.wait_for_run_to_complete( log, databricks_run_id) finally: if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id) for event in self.get_step_events(run_id, step_key): log_step_event(step_context, event) yield event