def _dbnd_run_error(self, ex): if ( # what scenario is this aiflow filtering supposed to help with? # I had airflow put a default airflow.cfg in .dbnd causing validation error in k8sExecutor which was invisible in the console (only in task log) ("airflow" not in ex.__class__.__name__.lower() or ex.__class__.__name__ == "AirflowConfigException") and "Failed tasks are:" not in str(ex) and not isinstance(ex, DatabandRunError) and not isinstance(ex, KeyboardInterrupt) and not isinstance(ex, DatabandSigTermError)): logger.exception(ex) if (isinstance(ex, KeyboardInterrupt) or isinstance(ex, DatabandSigTermError) or self.is_killed()): run_state = RunState.CANCELLED unfinished_task_state = TaskRunState.UPSTREAM_FAILED elif isinstance(ex, DatabandFailFastError): run_state = RunState.FAILED unfinished_task_state = TaskRunState.UPSTREAM_FAILED else: run_state = RunState.FAILED unfinished_task_state = TaskRunState.FAILED self.set_run_state(run_state) self.tracker.tracking_store.set_unfinished_tasks_state( run_uid=self.run_uid, state=unfinished_task_state) err_banner_msg = self.describe.get_error_banner() logger.error(u"\n\n{sep}\n{banner}\n{sep}".format( sep=console_utils.ERROR_SEPARATOR, banner=err_banner_msg)) return DatabandRunError("Run has failed: %s" % ex, run=self, nested_exceptions=ex)
def _get_dbnd_run_relative_cmd(): argv = list(sys.argv) while argv: current = argv.pop(0) if current == "run": return argv raise DatabandRunError( "Can't calculate run command from '%s'", help_msg="Check that it has a format of '..executable.. run ...'", )
def do_run(self): topological_tasks = topological_sort( [tr.task for tr in self.task_runs]) fail_fast = self.settings.run.fail_fast task_failed = False task_runs_to_update_state = [] for task in topological_tasks: tr = self.run.get_task_run_by_id(task.task_id) if tr.is_reused: continue if fail_fast and task_failed: logger.info("Setting %s to %s", task.task_id, TaskRunState.UPSTREAM_FAILED) tr.set_task_run_state(TaskRunState.UPSTREAM_FAILED, track=False) task_runs_to_update_state.append(tr) continue if self.run.is_killed(): logger.info( "Databand Context is killed! Stopping %s to %s", task.task_id, TaskRunState.FAILED, ) tr.set_task_run_state(TaskRunState.FAILED, track=False) task_runs_to_update_state.append(tr) continue logger.debug("Executing task: %s", task.task_id) try: tr.runner.execute() except DatabandSigTermError as e: raise e except Exception as e: task_failed = True logger.error("Failed to execute task '%s': %s" % (task.task_id, str(e))) if task_runs_to_update_state: self.run.tracker.set_task_run_states(task_runs_to_update_state) if task_failed: err = _collect_errors(self.run.task_runs) if err: raise DatabandRunError(err)
def _execute(self, session=None): """ Initializes all components required to run a dag for a specified date range and calls helper method to execute the tasks. """ # Trigger cleaning if self.airflow_config.clean_zombies_during_backfill: ClearZombieJob().run() ti_status = BackfillJob._DagRunTaskStatus() # picklin' pickle_id = self.dag.pickle_id # We don't need to pickle our dag again as it already pickled on job creattion # also this will save it into databand table, that have no use for the airflow # if not self.donot_pickle and self.executor.__class__ not in ( # executors.LocalExecutor, # executors.SequentialExecutor, # ): # pickle_id = airflow_pickle(self.dag, session=session) executor = self.executor executor.start() ti_status.total_runs = 1 # total dag runs in backfill dag_run = None try: dag_run = self._get_dag_run(session=session) # Create relation DagRun <> Job dag_run.conf = {"job_id": self.id} session.merge(dag_run) session.commit() run_date = dag_run.execution_date if dag_run is None: raise DatabandSystemError("Can't build dagrun") tis_map = self._task_instances_for_dag_run(dag_run, session=session) if not tis_map: raise DatabandSystemError("There are no task instances to run!") ti_status.active_runs.append(dag_run) ti_status.to_run.update(tis_map or {}) processed_dag_run_dates = self._process_dag_task_instances( ti_status=ti_status, executor=executor, pickle_id=pickle_id, session=session, ) ti_status.executed_dag_run_dates.update(processed_dag_run_dates) err = self._collect_errors(ti_status=ti_status, session=session) if err: raise DatabandRunError("Airflow executor has failed to run the run") if run_date not in ti_status.executed_dag_run_dates: self.log.warning( "Dag %s is not marked as completed! %s not found in %s", self.dag_id, run_date, ti_status.executed_dag_run_dates, ) finally: # in sequential executor a keyboard interrupt would reach here and # then executor.end() -> heartbeat() -> sync() will cause the queued commands # to be run again before exiting if hasattr(executor, "commands_to_run"): executor.commands_to_run = [] try: executor.end() except Exception: logger.exception("Failed to terminate executor") if dag_run and dag_run.state == State.RUNNING: _kill_dag_run_zombi(dag_run, session) session.commit() self.log.info("Run is completed. Exiting.")