def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() executor_fails = Counter() # Build a list of all instances to run tasks_to_run = {} failed = set() succeeded = set() started = set() skipped = set() not_ready = set() deadlocked = set() for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in self.dag.date_range(start_date, end_date=end_date): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti session.merge(ti) session.commit() # Triggering what is ready to get triggered while tasks_to_run and not deadlocked: not_ready.clear() for key, ti in list(tasks_to_run.items()): ti.refresh_from_db() ignore_depends_on_past = (self.ignore_first_depends_on_past and ti.execution_date == (start_date or ti.start_date)) # The task was already marked successful or skipped by a # different Job. Don't rerun it. if key not in started: if ti.state == State.SUCCESS: succeeded.add(key) tasks_to_run.pop(key) continue elif ti.state == State.SKIPPED: skipped.add(key) tasks_to_run.pop(key) continue # Is the task runnable? -- then run it if ti.is_queueable( include_queued=True, ignore_depends_on_past=ignore_depends_on_past, flag_upstream_failed=True): self.logger.debug('Sending {} to executor'.format(ti)) executor.queue_task_instance( ti, mark_success=self.mark_success, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies, ignore_depends_on_past=ignore_depends_on_past, pool=self.pool) started.add(key) # Mark the task as not ready to run elif ti.state in (State.NONE, State.UPSTREAM_FAILED): not_ready.add(key) self.heartbeat() executor.heartbeat() # If the set of tasks that aren't ready ever equals the set of # tasks to run, then the backfill is deadlocked if not_ready and not_ready == set(tasks_to_run): deadlocked.update(tasks_to_run.values()) tasks_to_run.clear() # Reacting to events for key, state in list(executor.get_event_buffer().items()): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() # executor reports failure if state == State.FAILED: # task reports running if ti.state == State.RUNNING: msg = ('Executor reports that task instance {} failed ' 'although the task says it is running.'.format( key)) self.logger.error(msg) ti.handle_failure(msg) tasks_to_run.pop(key) # task reports skipped elif ti.state == State.SKIPPED: self.logger.error("Skipping {} ".format(key)) skipped.add(key) tasks_to_run.pop(key) # anything else is a failure else: self.logger.error( "Task instance {} failed".format(key)) failed.add(key) tasks_to_run.pop(key) # executor reports success elif state == State.SUCCESS: # task reports success if ti.state == State.SUCCESS: self.logger.info( 'Task instance {} succeeded'.format(key)) succeeded.add(key) tasks_to_run.pop(key) # task reports failure elif ti.state == State.FAILED: self.logger.error( "Task instance {} failed".format(key)) failed.add(key) tasks_to_run.pop(key) # task reports skipped elif ti.state == State.SKIPPED: self.logger.info( "Task instance {} skipped".format(key)) skipped.add(key) tasks_to_run.pop(key) # this probably won't ever be triggered elif ti in not_ready: self.logger.info( "{} wasn't expected to run, but it did".format(ti)) # executor reports success but task does not - this is weird elif ti.state not in (State.SUCCESS, State.QUEUED, State.UP_FOR_RETRY): self.logger.error( "The airflow run command failed " "at reporting an error. This should not occur " "in normal circumstances. Task state is '{}'," "reported state is '{}'. TI is {}" "".format(ti.state, state, ti)) # if the executor fails 3 or more times, stop trying to # run the task executor_fails[key] += 1 if executor_fails[key] >= 3: msg = ( 'The airflow run command failed to report an ' 'error for task {} three or more times. The ' 'task is being marked as failed. This is very ' 'unusual and probably means that an error is ' 'taking place before the task even ' 'starts.'.format(key)) self.logger.error(msg) ti.handle_failure(msg) tasks_to_run.pop(key) msg = ' | '.join([ "[backfill progress]", "waiting: {0}", "succeeded: {1}", "kicked_off: {2}", "failed: {3}", "skipped: {4}", "deadlocked: {5}" ]).format(len(tasks_to_run), len(succeeded), len(started), len(failed), len(skipped), len(deadlocked)) self.logger.info(msg) executor.end() session.close() err = '' if failed: err += ("---------------------------------------------------\n" "Some task instances failed:\n{}\n".format(failed)) if deadlocked: err += ('---------------------------------------------------\n' 'BackfillJob is deadlocked.') deadlocked_depends_on_past = any( t.are_dependencies_met() != t.are_dependencies_met( ignore_depends_on_past=True) for t in deadlocked) if deadlocked_depends_on_past: err += ( 'Some of the deadlocked tasks were unable to run because ' 'of "depends_on_past" relationships. Try running the ' 'backfill with the option ' '"ignore_first_depends_on_past=True" or passing "-I" at ' 'the command line.') err += ' These tasks were unable to run:\n{}\n'.format(deadlocked) if err: raise AirflowException(err) self.logger.info("Backfill done. Exiting.")
def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() # Build a list of all instances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in self.dag.date_range(start_date, end_date=end_date): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: for key, ti in list(tasks_to_run.items()): ti.refresh_from_db() if ti.state in (State.SUCCESS, State.SKIPPED) and key in tasks_to_run: succeeded.append(key) tasks_to_run.pop(key) elif ti.state in (State.RUNNING, State.QUEUED): continue elif ti.is_runnable(flag_upstream_failed=True): executor.queue_task_instance( ti, mark_success=self.mark_success, task_start_date=self.bf_start_date, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies, pool=self.pool) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in list(executor.get_event_buffer().items()): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if (ti.state in (State.FAILED, State.SKIPPED) or state == State.FAILED): if ti.state == State.FAILED or state == State.FAILED: failed.append(key) self.logger.error("Task instance " + str(key) + " failed") elif ti.state == State.SKIPPED: wont_run.append(key) self.logger.error("Skipping " + str(key) + " failed") tasks_to_run.pop(key) # Removing downstream tasks that also shouldn't run for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) tasks_to_run.pop(key) elif ti.state == State.SUCCESS and state == State.SUCCESS: succeeded.append(key) tasks_to_run.pop(key) elif (ti.state not in (State.SUCCESS, State.QUEUED) and state == State.SUCCESS): self.logger.error( "The airflow run command failed " "at reporting an error. This should not occur " "in normal circumstances. Task state is '{}'," "reported state is '{}'. TI is {}" "".format(ti.state, state, ti)) msg = ("[backfill progress] " "waiting: {0} | " "succeeded: {1} | " "kicked_off: {2} | " "failed: {3} | " "wont_run: {4} ").format(len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) self.logger.info(msg) executor.end() session.close() if failed: msg = ("------------------------------------------\n" "Some tasks instances failed, " "here's the list:\n{}".format(failed)) raise AirflowException(msg) self.logger.info("All done. Exiting.")
def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() # Build a list of all instances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range( start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: for key, ti in tasks_to_run.items(): ti.refresh_from_db() if ti.state == State.SUCCESS and key in tasks_to_run: succeeded.append(key) del tasks_to_run[key] elif ti.is_runnable(): executor.queue_task_instance( ti, mark_success=self.mark_success, task_start_date=self.bf_start_date, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in executor.get_event_buffer().items(): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ti.state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") del tasks_to_run[key] # Removing downstream tasks from the one that has failed for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) del tasks_to_run[key] elif ti.state == State.SUCCESS: succeeded.append(key) del tasks_to_run[key] msg = ( "[backfill progress] " "waiting: {0} | " "succeeded: {1} | " "kicked_off: {2} | " "failed: {3} | " "skipped: {4} ").format( len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) executor.end() session.close() if failed: raise AirflowException( "Some tasks instances failed, here's the list:\n"+str(failed)) logging.info("All done. Exiting.")
def _execute(self): """ Runs a dag for a specified date range. """ start_date = self.bf_start_date end_date = self.bf_end_date session = settings.Session() pickle = models.DagPickle(self.dag, self) executor = self.executor executor.start() session.add(pickle) session.commit() pickle_id = pickle.id # Build a list of all intances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range(start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: msg = ("Yet to run: {0} | " "Succeeded: {1} | " "Started: {2} | " "Failed: {3} | " "Won't run: {4} ").format(len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) for key, ti in tasks_to_run.items(): ti.refresh_from_db() if ti.state == State.SUCCESS and key in tasks_to_run: succeeded.append(key) del tasks_to_run[key] elif ti.is_runnable(): executor.queue_command(key=ti.key, command=ti.command( mark_success=self.mark_success, pickle_id=pickle_id)) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in executor.get_event_buffer().items(): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ti.state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") del tasks_to_run[key] # Removing downstream tasks from the one that has failed for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) del tasks_to_run[key] elif ti.state == State.SUCCESS: succeeded.append(key) del tasks_to_run[key] executor.end() logging.info("Run summary:") session.close()