Exemplo n.º 1
0
 def test_09_join_pipeline_task(self):
     "Dummy test pipeline join"
     task = self.dag.get_task('join_pipeline_task')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     task.execute(task_instance.get_template_context())
     assert 1 == 1
Exemplo n.º 2
0
 def test_clear_api(self):
     task = self.dag_bash.tasks[0]
     task.clear(
         start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
         upstream=True, downstream=True)
     ti = models.TaskInstance(task=task, execution_date=DEFAULT_DATE)
     ti.are_dependents_done()
Exemplo n.º 3
0
    def process_events(self, executor, dagbag):
        """
        Respond to executor events.

        Used to identify queued tasks and schedule them for further processing.
        """
        for key, executor_state in list(executor.get_event_buffer().items()):
            dag_id, task_id, execution_date = key
            if dag_id not in dagbag.dags:
                self.logger.error(
                    'Executor reported a dag_id that was not found in the '
                    'DagBag: {}'.format(dag_id))
                continue
            elif not dagbag.dags[dag_id].has_task(task_id):
                self.logger.error(
                    'Executor reported a task_id that was not found in the '
                    'dag: {} in dag {}'.format(task_id, dag_id))
                continue
            task = dagbag.dags[dag_id].get_task(task_id)
            ti = models.TaskInstance(task, execution_date)
            ti.refresh_from_db()

            if executor_state == State.SUCCESS:
                # collect queued tasks for prioritiztion
                if ti.state == State.QUEUED:
                    self.queued_tis.add(ti)
            else:
                # special instructions for failed executions could go here
                pass
Exemplo n.º 4
0
 def setUp(self):
     test_setup(self, dag)
     # Execute start task as it is dummy task and does not need to be tested
     start_task = self.dag.get_task('start_task')
     task_instance = models.TaskInstance(task=start_task,
                                         execution_date=datetime.now())
     start_task.execute(task_instance.get_template_context())
Exemplo n.º 5
0
 def test_06_weather_ingest_spark_task(self):
     "Test to confirm that ingest spark task ingests and persists data"
     local_path = get_local_path(self.weather_ingest_path)
     task = self.dag.get_task('spark_ingest_weatherdata')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == True
     verify_schema(local_path, 17, ['date', 'hum_avg'])
Exemplo n.º 6
0
 def test_07_weather_transform_cleanup_task(self):
     "Test to confirm that cleanup task removes directory if exists"
     local_path = get_local_path(self.weather_transform_path)
     task = self.dag.get_task('cleanup_transform_weatherdata')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     create_tmp_dir(local_path)
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == False
Exemplo n.º 7
0
 def test_08_weather_transform_spark_task(self):
     "Test to confirm that spark task transforms and persists data"
     local_path = get_local_path(self.weather_transform_path)
     task = self.dag.get_task('spark_transform_weatherdata')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == True
     verify_schema(local_path, 19, ['dayofweek', 'humidity_range'])
Exemplo n.º 8
0
 def test_02_uber_ingest_spark_task(self):
     "Test to confirm that ingest spark task ingests and persists data"
     local_path = get_local_path(self.uber_ingest_path)
     task = self.dag.get_task('spark_ingest_uberdata')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == True
     verify_schema(local_path, 6, ['DATE', 'TIME', 'PICK_UP_ADDRESS'])
Exemplo n.º 9
0
 def test_10_datamart_cleanup_task(self):
     "Test to confirm that cleanup task removes directory if exists"
     local_path = get_local_path(self.app_path)
     task = self.dag.get_task(
         'cleanup_uber_rides_by_himidity_uberridesbyhumidity')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     create_tmp_dir(local_path)
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == False
Exemplo n.º 10
0
    def test_scheduler_verify_pool_full(self, mock_pool_full):
        """
        Test task instances not queued when pool is full
        """
        mock_pool_full.return_value = False

        dag = DAG(
            dag_id='test_scheduler_verify_pool_full',
            start_date=DEFAULT_DATE)

        DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow',
            pool='test_scheduler_verify_pool_full')

        session = settings.Session()
        pool = Pool(pool='test_scheduler_verify_pool_full', slots=1)
        session.add(pool)
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        # Create 2 dagruns, which will create 2 task instances.
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        self.assertEquals(dr.execution_date, DEFAULT_DATE)
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        queue = []
        scheduler._process_task_instances(dag, queue=queue)
        self.assertEquals(len(queue), 2)
        dagbag = SimpleDagBag([dag])

        # Recreated part of the scheduler here, to kick off tasks -> executor
        for ti_key in queue:
            task = dag.get_task(ti_key[1])
            ti = models.TaskInstance(task, ti_key[2])
            # Task starts out in the scheduled state. All tasks in the
            # scheduled state will be sent to the executor
            ti.state = State.SCHEDULED

            # Also save this task instance to the DB.
            session.merge(ti)
            session.commit()

        scheduler._execute_task_instances(dagbag,
                                          (State.SCHEDULED,
                                           State.UP_FOR_RETRY))

        self.assertEquals(len(scheduler.executor.queued_tasks), 1)
    def run_backfill(self, dag_id, task_id):
        dag = self.dagbag.get_dag(dag_id)
        dag.clear()

        BackfillJob(dag=dag, start_date=DEFAULT_DATE,
                    end_date=DEFAULT_DATE).run()

        ti = models.TaskInstance(task=dag.get_task(task_id),
                                 execution_date=DEFAULT_DATE)
        ti.refresh_from_db()

        self.assertEqual(ti.state, State.SUCCESS)
Exemplo n.º 12
0
 def test_run_pooling_task(self):
     """
     test that running task with mark_success param update task state as SUCCESS
     without running task.
     """
     dag = models.DAG(dag_id='test_run_pooling_task')
     task = DummyOperator(task_id='test_run_pooling_task_op',
                          dag=dag,
                          pool='test_run_pooling_task_pool',
                          owner='airflow',
                          start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
     ti = models.TaskInstance(task=task,
                              execution_date=datetime.datetime.now())
     ti.run()
     assert ti.state == models.State.QUEUED
Exemplo n.º 13
0
 def test_11_datamart_transform_spark_task(self):
     "Test to confirm that spark task joins and persists data"
     local_path = get_local_path(self.app_path)
     task = self.dag.get_task(
         'spark_uber_rides_by_himidity_uberridesbyhumidity')
     task_instance = models.TaskInstance(task=task,
                                         execution_date=datetime.now())
     task.execute(task_instance.get_template_context())
     assert check_dir_exists(local_path) == True
     files_list = glob.glob(local_path + '/*.csv')
     assert len(files_list) == 1
     reader = csv.DictReader(open(files_list[0]))
     schema = reader.fieldnames
     assert 'humidity_range' in schema
     assert 'count' in schema
Exemplo n.º 14
0
    def update_last_run(self):
        last_dag_run = self.last_run()

        if last_dag_run:
            dag_task_execution_date = self.previous_schedule(
                last_dag_run.execution_date)
            print dag_task_execution_date

            if dag_task_execution_date.date(
            ) != last_dag_run.execution_date.date():
                dag_task_execution_date = datetime.combine(
                    last_dag_run.execution_date.date(),
                    dag_task_execution_date.time())

            print dag_task_execution_date
            print last_dag_run.execution_date

            if dag_task_execution_date != last_dag_run.execution_date:
                session = settings.Session

                dag_re_schedule_run = models.DagRun(
                    dag_id=self.dag_id,
                    run_id='scheduled__' + dag_task_execution_date.isoformat(),
                    execution_date=dag_task_execution_date,
                    start_date=datetime.now(),
                    end_date=datetime.now(),
                    state=State.SUCCESS,
                    external_trigger=False)
                session.add(dag_re_schedule_run)
                session.commit()

                for dag_task_id in self.task_ids:
                    task_instance = models.TaskInstance(
                        self.get_task(dag_task_id),
                        execution_date=dag_task_execution_date,
                        state=State.SUCCESS)
                    task_instance.start_date = datetime.now()
                    task_instance.end_date = datetime.now()

                    session.add(task_instance)
                    session.commit()
Exemplo n.º 15
0
        def get_rendered_template(task):
            """
            Returns a rendered BigQuery SQL script.

            :param task: BigQueryOperator task that need to be rendered
            :type task: BigQueryOperator
            :return: list of templated fields from BigQueryOperator
            :rtype: list(str)
            """
            # Added dags to the bql script path to create correct path
            if hasattr(task, 'sql'):
                task.sql = '/dags/' + task.sql
            if hasattr(task, 'bql'):
                task.bql = '/dags/' + task.bql

            dttm = datetime.datetime(2018, 10, 21, 0, 0, 0)
            ti = af_models.TaskInstance(task=task, execution_date=dttm)
            try:
                ti.render_templates()
            except Exception as e:
                raise Exception("Error rendering template: " + str(e))
            return task.__class__.template_fields
Exemplo n.º 16
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        start_date = self.bf_start_date
        end_date = self.bf_end_date

        session = settings.Session()
        pickle = models.DagPickle(self.dag, self)
        executor = self.executor
        executor.start()
        session.add(pickle)
        session.commit()
        pickle_id = pickle.id

        # Build a list of all intances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in utils.date_range(start_date, end_date,
                                         task.dag.schedule_interval):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            msg = ("Yet to run: {0} | "
                   "Succeeded: {1} | "
                   "Started: {2} | "
                   "Failed: {3} | "
                   "Won't run: {4} ").format(len(tasks_to_run), len(succeeded),
                                             len(started), len(failed),
                                             len(wont_run))

            logging.info(msg)
            for key, ti in tasks_to_run.items():
                ti.refresh_from_db()
                if ti.state == State.SUCCESS and key in tasks_to_run:
                    succeeded.append(key)
                    del tasks_to_run[key]
                elif ti.is_runnable():
                    executor.queue_command(key=ti.key,
                                           command=ti.command(
                                               mark_success=self.mark_success,
                                               pickle_id=pickle_id))
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in executor.get_event_buffer().items():
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if ti.state == State.FAILED:
                    failed.append(key)
                    logging.error("Task instance " + str(key) + " failed")
                    del tasks_to_run[key]
                    # Removing downstream tasks from the one that has failed
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            del tasks_to_run[key]
                elif ti.state == State.SUCCESS:
                    succeeded.append(key)
                    del tasks_to_run[key]
        executor.end()
        logging.info("Run summary:")
        session.close()
Exemplo n.º 17
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()
        executor_fails = Counter()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = set()
        succeeded = set()
        started = set()
        skipped = set()
        not_ready = set()
        deadlocked = set()

        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in self.dag.date_range(start_date, end_date=end_date):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti
                session.merge(ti)
        session.commit()

        # Triggering what is ready to get triggered
        while tasks_to_run and not deadlocked:
            not_ready.clear()
            for key, ti in list(tasks_to_run.items()):

                ti.refresh_from_db()
                ignore_depends_on_past = (self.ignore_first_depends_on_past
                                          and ti.execution_date
                                          == (start_date or ti.start_date))

                # The task was already marked successful or skipped by a
                # different Job. Don't rerun it.
                if key not in started:
                    if ti.state == State.SUCCESS:
                        succeeded.add(key)
                        tasks_to_run.pop(key)
                        continue
                    elif ti.state == State.SKIPPED:
                        skipped.add(key)
                        tasks_to_run.pop(key)
                        continue

                # Is the task runnable? -- then run it
                if ti.is_queueable(
                        include_queued=True,
                        ignore_depends_on_past=ignore_depends_on_past,
                        flag_upstream_failed=True):
                    self.logger.debug('Sending {} to executor'.format(ti))
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies,
                        ignore_depends_on_past=ignore_depends_on_past,
                        pool=self.pool)
                    started.add(key)

                # Mark the task as not ready to run
                elif ti.state in (State.NONE, State.UPSTREAM_FAILED):
                    not_ready.add(key)

            self.heartbeat()
            executor.heartbeat()

            # If the set of tasks that aren't ready ever equals the set of
            # tasks to run, then the backfill is deadlocked
            if not_ready and not_ready == set(tasks_to_run):
                deadlocked.update(tasks_to_run.values())
                tasks_to_run.clear()

            # Reacting to events
            for key, state in list(executor.get_event_buffer().items()):
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()

                # executor reports failure
                if state == State.FAILED:

                    # task reports running
                    if ti.state == State.RUNNING:
                        msg = ('Executor reports that task instance {} failed '
                               'although the task says it is running.'.format(
                                   key))
                        self.logger.error(msg)
                        ti.handle_failure(msg)
                        tasks_to_run.pop(key)

                    # task reports skipped
                    elif ti.state == State.SKIPPED:
                        self.logger.error("Skipping {} ".format(key))
                        skipped.add(key)
                        tasks_to_run.pop(key)

                    # anything else is a failure
                    else:
                        self.logger.error(
                            "Task instance {} failed".format(key))
                        failed.add(key)
                        tasks_to_run.pop(key)

                # executor reports success
                elif state == State.SUCCESS:

                    # task reports success
                    if ti.state == State.SUCCESS:
                        self.logger.info(
                            'Task instance {} succeeded'.format(key))
                        succeeded.add(key)
                        tasks_to_run.pop(key)

                    # task reports failure
                    elif ti.state == State.FAILED:
                        self.logger.error(
                            "Task instance {} failed".format(key))
                        failed.add(key)
                        tasks_to_run.pop(key)

                    # task reports skipped
                    elif ti.state == State.SKIPPED:
                        self.logger.info(
                            "Task instance {} skipped".format(key))
                        skipped.add(key)
                        tasks_to_run.pop(key)

                    # this probably won't ever be triggered
                    elif ti in not_ready:
                        self.logger.info(
                            "{} wasn't expected to run, but it did".format(ti))

                    # executor reports success but task does not - this is weird
                    elif ti.state not in (State.SUCCESS, State.QUEUED,
                                          State.UP_FOR_RETRY):
                        self.logger.error(
                            "The airflow run command failed "
                            "at reporting an error. This should not occur "
                            "in normal circumstances. Task state is '{}',"
                            "reported state is '{}'. TI is {}"
                            "".format(ti.state, state, ti))

                        # if the executor fails 3 or more times, stop trying to
                        # run the task
                        executor_fails[key] += 1
                        if executor_fails[key] >= 3:
                            msg = (
                                'The airflow run command failed to report an '
                                'error for task {} three or more times. The '
                                'task is being marked as failed. This is very '
                                'unusual and probably means that an error is '
                                'taking place before the task even '
                                'starts.'.format(key))
                            self.logger.error(msg)
                            ti.handle_failure(msg)
                            tasks_to_run.pop(key)

            msg = ' | '.join([
                "[backfill progress]", "waiting: {0}", "succeeded: {1}",
                "kicked_off: {2}", "failed: {3}", "skipped: {4}",
                "deadlocked: {5}"
            ]).format(len(tasks_to_run), len(succeeded), len(started),
                      len(failed), len(skipped), len(deadlocked))
            self.logger.info(msg)

        executor.end()
        session.close()

        err = ''
        if failed:
            err += ("---------------------------------------------------\n"
                    "Some task instances failed:\n{}\n".format(failed))
        if deadlocked:
            err += ('---------------------------------------------------\n'
                    'BackfillJob is deadlocked.')
            deadlocked_depends_on_past = any(
                t.are_dependencies_met() != t.are_dependencies_met(
                    ignore_depends_on_past=True) for t in deadlocked)
            if deadlocked_depends_on_past:
                err += (
                    'Some of the deadlocked tasks were unable to run because '
                    'of "depends_on_past" relationships. Try running the '
                    'backfill with the option '
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    'the command line.')
            err += ' These tasks were unable to run:\n{}\n'.format(deadlocked)
        if err:
            raise AirflowException(err)

        self.logger.info("Backfill done. Exiting.")
Exemplo n.º 18
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in utils.date_range(
                    start_date, end_date, task.dag.schedule_interval):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            for key, ti in tasks_to_run.items():
                ti.refresh_from_db()
                if ti.state == State.SUCCESS and key in tasks_to_run:
                    succeeded.append(key)
                    del tasks_to_run[key]
                elif ti.is_runnable():
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        task_start_date=self.bf_start_date,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies)
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in executor.get_event_buffer().items():
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if ti.state == State.FAILED:
                    failed.append(key)
                    logging.error("Task instance " + str(key) + " failed")
                    del tasks_to_run[key]
                    # Removing downstream tasks from the one that has failed
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            del tasks_to_run[key]
                elif ti.state == State.SUCCESS:
                    succeeded.append(key)
                    del tasks_to_run[key]

            msg = (
                "[backfill progress] "
                "waiting: {0} | "
                "succeeded: {1} | "
                "kicked_off: {2} | "
                "failed: {3} | "
                "skipped: {4} ").format(
                    len(tasks_to_run),
                    len(succeeded),
                    len(started),
                    len(failed),
                    len(wont_run))
            logging.info(msg)

        executor.end()
        session.close()
        if failed:
            raise AirflowException(
                "Some tasks instances failed, here's the list:\n"+str(failed))
        logging.info("All done. Exiting.")
Exemplo n.º 19
0
    def _execute(self):
        """
        Runs a dag for a specified date range.
        """
        session = settings.Session()

        start_date = self.bf_start_date
        end_date = self.bf_end_date

        # picklin'
        pickle_id = None
        if not self.donot_pickle and self.executor.__class__ not in (
                executors.LocalExecutor, executors.SequentialExecutor):
            pickle = models.DagPickle(self.dag)
            session.add(pickle)
            session.commit()
            pickle_id = pickle.id

        executor = self.executor
        executor.start()

        # Build a list of all instances to run
        tasks_to_run = {}
        failed = []
        succeeded = []
        started = []
        wont_run = []
        for task in self.dag.tasks:
            if (not self.include_adhoc) and task.adhoc:
                continue

            start_date = start_date or task.start_date
            end_date = end_date or task.end_date or datetime.now()
            for dttm in self.dag.date_range(start_date, end_date=end_date):
                ti = models.TaskInstance(task, dttm)
                tasks_to_run[ti.key] = ti

        # Triggering what is ready to get triggered
        while tasks_to_run:
            for key, ti in list(tasks_to_run.items()):
                ti.refresh_from_db()
                if ti.state in (State.SUCCESS,
                                State.SKIPPED) and key in tasks_to_run:
                    succeeded.append(key)
                    tasks_to_run.pop(key)
                elif ti.state in (State.RUNNING, State.QUEUED):
                    continue
                elif ti.is_runnable(flag_upstream_failed=True):
                    executor.queue_task_instance(
                        ti,
                        mark_success=self.mark_success,
                        task_start_date=self.bf_start_date,
                        pickle_id=pickle_id,
                        ignore_dependencies=self.ignore_dependencies,
                        pool=self.pool)
                    ti.state = State.RUNNING
                    if key not in started:
                        started.append(key)
            self.heartbeat()
            executor.heartbeat()

            # Reacting to events
            for key, state in list(executor.get_event_buffer().items()):
                dag_id, task_id, execution_date = key
                if key not in tasks_to_run:
                    continue
                ti = tasks_to_run[key]
                ti.refresh_from_db()
                if (ti.state in (State.FAILED, State.SKIPPED)
                        or state == State.FAILED):
                    if ti.state == State.FAILED or state == State.FAILED:
                        failed.append(key)
                        self.logger.error("Task instance " + str(key) +
                                          " failed")
                    elif ti.state == State.SKIPPED:
                        wont_run.append(key)
                        self.logger.error("Skipping " + str(key) + " failed")
                    tasks_to_run.pop(key)
                    # Removing downstream tasks that also shouldn't run
                    for t in self.dag.get_task(task_id).get_flat_relatives(
                            upstream=False):
                        key = (ti.dag_id, t.task_id, execution_date)
                        if key in tasks_to_run:
                            wont_run.append(key)
                            tasks_to_run.pop(key)
                elif ti.state == State.SUCCESS and state == State.SUCCESS:
                    succeeded.append(key)
                    tasks_to_run.pop(key)
                elif (ti.state not in (State.SUCCESS, State.QUEUED)
                      and state == State.SUCCESS):
                    self.logger.error(
                        "The airflow run command failed "
                        "at reporting an error. This should not occur "
                        "in normal circumstances. Task state is '{}',"
                        "reported state is '{}'. TI is {}"
                        "".format(ti.state, state, ti))

            msg = ("[backfill progress] "
                   "waiting: {0} | "
                   "succeeded: {1} | "
                   "kicked_off: {2} | "
                   "failed: {3} | "
                   "wont_run: {4} ").format(len(tasks_to_run), len(succeeded),
                                            len(started), len(failed),
                                            len(wont_run))
            self.logger.info(msg)

        executor.end()
        session.close()
        if failed:
            msg = ("------------------------------------------\n"
                   "Some tasks instances failed, "
                   "here's the list:\n{}".format(failed))
            raise AirflowException(msg)
        self.logger.info("All done. Exiting.")