def run(self, start_date=None, end_date=None, ignore_dependencies=False, force=False, mark_success=False): """ Run a set of task instances for a date range. """ start_date = start_date or self.start_date end_date = end_date or self.end_date or datetime.now() for dt in utils.date_range(start_date, end_date, self.schedule_interval): TaskInstance(self, dt).run( mark_success=mark_success, ignore_dependencies=ignore_dependencies, force=force, )
def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() # Build a list of all instances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range( start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: for key, ti in tasks_to_run.items(): ti.refresh_from_db() if ti.state == State.SUCCESS and key in tasks_to_run: succeeded.append(key) del tasks_to_run[key] elif ti.is_runnable(): executor.queue_task_instance( ti, mark_success=self.mark_success, task_start_date=self.bf_start_date, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in executor.get_event_buffer().items(): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ti.state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") del tasks_to_run[key] # Removing downstream tasks from the one that has failed for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) del tasks_to_run[key] elif ti.state == State.SUCCESS: succeeded.append(key) del tasks_to_run[key] msg = ( "[backfill progress] " "waiting: {0} | " "succeeded: {1} | " "kicked_off: {2} | " "failed: {3} | " "skipped: {4} ").format( len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) executor.end() session.close() if failed: raise AirflowException( "Some tasks instances failed, here's the list:\n"+str(failed)) logging.info("All done. Exiting.")
def tree(self): dag_id = request.args.get('dag_id') dag = dagbag.dags[dag_id] session = settings.Session() base_date = request.args.get('base_date') if not base_date: base_date = datetime.now() else: base_date = dateutil.parser.parse(base_date) num_runs = request.args.get('num_runs') num_runs = int(num_runs) if num_runs else 25 from_date = (base_date - (num_runs * dag.schedule_interval)).date() from_date = datetime.combine(from_date, datetime.min.time()) dates = utils.date_range(from_date, base_date, dag.schedule_interval) task_instances = {} for ti in dag.get_task_instances(session, from_date): task_instances[(ti.task_id, ti.execution_date)] = ti expanded = [] def recurse_nodes(task): children = [recurse_nodes(t) for t in task.upstream_list] # D3 tree uses children vs _children to define what is # expanded or not. The following block makes it such that # repeated nodes are collapsed by default. children_key = 'children' if task.task_id not in expanded: expanded.append(task.task_id) elif children: children_key = "_children" return { 'name': task.task_id, 'instances': [ utils.alchemy_to_dict(task_instances.get( (task.task_id, d))) or { 'execution_date': d.isoformat(), 'task_id': task.task_id } for d in dates ], children_key: children, 'num_dep': len(task.upstream_list), 'operator': task.task_type, 'retries': task.retries, 'owner': task.owner, 'start_date': task.start_date, 'end_date': task.end_date, 'depends_on_past': task.depends_on_past, } if len(dag.roots) > 1: # d3 likes a single root data = { 'name': 'root', 'instances': [], 'children': [recurse_nodes(t) for t in dag.roots] } else: data = recurse_nodes(dag.roots[0]) data = json.dumps(data, indent=4, default=utils.json_ser) session.commit() session.close() return self.render('airflow/tree.html', dag=dag, data=data)
def _execute(self): """ Runs a dag for a specified date range. """ start_date = self.bf_start_date end_date = self.bf_end_date session = settings.Session() pickle = models.DagPickle(self.dag, self) executor = self.executor executor.start() session.add(pickle) session.commit() pickle_id = pickle.id # Build a list of all intances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range( start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: msg = ( "Yet to run: {0} | " "Succeeded: {1} | " "Started: {2} | " "Failed: {3} | " "Won't run: {4} ").format( len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) for key, ti in tasks_to_run.items(): ti.refresh_from_db() if ti.state == State.SUCCESS and key in tasks_to_run: succeeded.append(key) del tasks_to_run[key] elif ti.is_runnable(): executor.queue_command( key=ti.key, command=ti.command( mark_success=self.mark_success, pickle_id=pickle_id) ) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in executor.get_event_buffer().items(): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ti.state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") del tasks_to_run[key] # Removing downstream tasks from the one that has failed for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) del tasks_to_run[key] elif ti.state == State.SUCCESS: succeeded.append(key) del tasks_to_run[key] executor.end() logging.info("Run summary:") session.close()
def tree(self): dag_id = request.args.get('dag_id') dag = dagbag.dags[dag_id] session = settings.Session() base_date = request.args.get('base_date') if not base_date: base_date = datetime.now() else: base_date = dateutil.parser.parse(base_date) num_runs = request.args.get('num_runs') num_runs = int(num_runs) if num_runs else 25 from_date = (base_date-(num_runs * dag.schedule_interval)).date() from_date = datetime.combine(from_date, datetime.min.time()) dates = utils.date_range( from_date, base_date, dag.schedule_interval) task_instances = {} for ti in dag.get_task_instances(session, from_date): task_instances[(ti.task_id, ti.execution_date)] = ti expanded = [] def recurse_nodes(task): children = [recurse_nodes(t) for t in task.upstream_list] # D3 tree uses children vs _children to define what is # expanded or not. The following block makes it such that # repeated nodes are collapsed by default. children_key = 'children' if task.task_id not in expanded: expanded.append(task.task_id) elif children: children_key = "_children" return { 'name': task.task_id, 'instances': [ utils.alchemy_to_dict( task_instances.get((task.task_id, d))) or { 'execution_date': d.isoformat(), 'task_id': task.task_id } for d in dates], children_key: children, 'num_dep': len(task.upstream_list), 'operator': task.task_type, 'retries': task.retries, 'owner': task.owner, 'start_date': task.start_date, 'end_date': task.end_date, 'depends_on_past': task.depends_on_past, } if len(dag.roots) > 1: # d3 likes a single root data = { 'name': 'root', 'instances': [], 'children': [recurse_nodes(t) for t in dag.roots] } else: data = recurse_nodes(dag.roots[0]) data = json.dumps(data, indent=4, default=utils.json_ser) session.commit() session.close() return self.render( 'airflow/tree.html', dag=dag, data=data)
def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() # Build a list of all instances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range(start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: for key, ti in list(tasks_to_run.items()): ti.refresh_from_db() if ti.state in (State.SUCCESS, State.SKIPPED) and key in tasks_to_run: succeeded.append(key) tasks_to_run.pop(key) elif ti.state in (State.RUNNING, State.QUEUED): continue elif ti.is_runnable(flag_upstream_failed=True): executor.queue_task_instance( ti, mark_success=self.mark_success, task_start_date=self.bf_start_date, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies, pool=self.pool) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in list(executor.get_event_buffer().items()): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if (ti.state in (State.FAILED, State.SKIPPED) or state == State.FAILED): if ti.state == State.FAILED or state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") elif ti.state == State.SKIPPED: wont_run.append(key) logging.error("Skipping " + str(key) + " failed") tasks_to_run.pop(key) # Removing downstream tasks that also shouldn't run for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) tasks_to_run.pop(key) elif ti.state == State.SUCCESS and state == State.SUCCESS: succeeded.append(key) tasks_to_run.pop(key) elif (ti.state not in (State.SUCCESS, State.QUEUED) and state == State.SUCCESS): logging.error( "The airflow run command failed " "at reporting an error. This should not occur " "in normal circustances. State is {}".format(ti.state)) msg = ("[backfill progress] " "waiting: {0} | " "succeeded: {1} | " "kicked_off: {2} | " "failed: {3} | " "wont_run: {4} ").format(len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) executor.end() session.close() if failed: logging.error("------------------------------------------\n" "Some tasks instances failed, " "here's the list:\n{}".format(failed)) sys.exit(1) logging.info("All done. Exiting.")
def _execute(self): """ Runs a dag for a specified date range. """ session = settings.Session() start_date = self.bf_start_date end_date = self.bf_end_date # picklin' pickle_id = None if not self.donot_pickle and self.executor.__class__ not in ( executors.LocalExecutor, executors.SequentialExecutor): pickle = models.DagPickle(self.dag) session.add(pickle) session.commit() pickle_id = pickle.id executor = self.executor executor.start() # Build a list of all instances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: if (not self.include_adhoc) and task.adhoc: continue start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range( start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: for key, ti in list(tasks_to_run.items()): ti.refresh_from_db() if ti.state in ( State.SUCCESS, State.SKIPPED) and key in tasks_to_run: succeeded.append(key) tasks_to_run.pop(key) elif ti.state in (State.RUNNING, State.QUEUED): continue elif ti.is_runnable(flag_upstream_failed=True): executor.queue_task_instance( ti, mark_success=self.mark_success, task_start_date=self.bf_start_date, pickle_id=pickle_id, ignore_dependencies=self.ignore_dependencies, pool=self.pool) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in list(executor.get_event_buffer().items()): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ( ti.state in (State.FAILED, State.SKIPPED) or state == State.FAILED): if ti.state == State.FAILED or state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") elif ti.state == State.SKIPPED: wont_run.append(key) logging.error("Skipping " + str(key) + " failed") tasks_to_run.pop(key) # Removing downstream tasks that also shouldn't run for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) tasks_to_run.pop(key) elif ti.state == State.SUCCESS and state == State.SUCCESS: succeeded.append(key) tasks_to_run.pop(key) elif ( ti.state not in (State.SUCCESS, State.QUEUED) and state == State.SUCCESS): logging.error( "The airflow run command failed " "at reporting an error. This should not occur " "in normal circustances. State is {}".format(ti.state)) msg = ( "[backfill progress] " "waiting: {0} | " "succeeded: {1} | " "kicked_off: {2} | " "failed: {3} | " "wont_run: {4} ").format( len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) executor.end() session.close() if failed: logging.error( "------------------------------------------\n" "Some tasks instances failed, " "here's the list:\n{}".format(failed)) sys.exit(1) logging.info("All done. Exiting.")
def _execute(self): """ Runs a dag for a specified date range. """ start_date = self.bf_start_date end_date = self.bf_end_date session = settings.Session() pickle = models.DagPickle(self.dag, self) executor = self.executor executor.start() session.add(pickle) session.commit() pickle_id = pickle.id # Build a list of all intances to run tasks_to_run = {} failed = [] succeeded = [] started = [] wont_run = [] for task in self.dag.tasks: start_date = start_date or task.start_date end_date = end_date or task.end_date or datetime.now() for dttm in utils.date_range(start_date, end_date, task.dag.schedule_interval): ti = models.TaskInstance(task, dttm) tasks_to_run[ti.key] = ti # Triggering what is ready to get triggered while tasks_to_run: msg = ("Yet to run: {0} | " "Succeeded: {1} | " "Started: {2} | " "Failed: {3} | " "Won't run: {4} ").format(len(tasks_to_run), len(succeeded), len(started), len(failed), len(wont_run)) logging.info(msg) for key, ti in tasks_to_run.items(): ti.refresh_from_db() if ti.state == State.SUCCESS and key in tasks_to_run: succeeded.append(key) del tasks_to_run[key] elif ti.is_runnable(): executor.queue_command(key=ti.key, command=ti.command( mark_success=self.mark_success, pickle_id=pickle_id)) ti.state = State.RUNNING if key not in started: started.append(key) self.heartbeat() executor.heartbeat() # Reacting to events for key, state in executor.get_event_buffer().items(): dag_id, task_id, execution_date = key if key not in tasks_to_run: continue ti = tasks_to_run[key] ti.refresh_from_db() if ti.state == State.FAILED: failed.append(key) logging.error("Task instance " + str(key) + " failed") del tasks_to_run[key] # Removing downstream tasks from the one that has failed for t in self.dag.get_task(task_id).get_flat_relatives( upstream=False): key = (ti.dag_id, t.task_id, execution_date) if key in tasks_to_run: wont_run.append(key) del tasks_to_run[key] elif ti.state == State.SUCCESS: succeeded.append(key) del tasks_to_run[key] executor.end() logging.info("Run summary:") session.close()