def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def kill_zombies(self, zombies, session=None): """ Fail given zombie tasks, which are tasks that haven't had a heartbeat for too long, in the current DagBag. :param zombies: zombie task instances to kill. :type zombies: SimpleTaskInstance :param session: DB session. :type Session. """ for zombie in zombies: if zombie.dag_id in self.dags: dag = self.dags[zombie.dag_id] if zombie.task_id in dag.task_ids: task = dag.get_task(zombie.task_id) ti = TaskInstance(task, zombie.execution_date) # Get properties needed for failure handling from SimpleTaskInstance. ti.start_date = zombie.start_date ti.end_date = zombie.end_date ti.try_number = zombie.try_number ti.state = zombie.state ti.test_mode = configuration.getboolean( 'core', 'unit_test_mode') ti.handle_failure("{} detected as zombie".format(ti), ti.test_mode, ti.get_template_context()) self.log.info('Marked zombie job %s as %s', ti, ti.state) Stats.incr('zombies_killed') session.commit()
def test_set_duration(self): task = DummyOperator(task_id='op', email='*****@*****.**') ti = TI( task=task, execution_date=datetime.datetime.now(), ) ti.start_date = datetime.datetime(2018, 10, 1, 1) ti.end_date = datetime.datetime(2018, 10, 1, 2) ti.set_duration() self.assertEqual(ti.duration, 3600)
def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'] and context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') downstream_task_ids = context['task'].downstream_task_ids if downstream_task_ids: session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(downstream_task_ids) ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. " "This should not happen." .format(task)) now = datetime.datetime.now() ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'] and context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids) ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. " "This should not happen." .format(task)) now = datetime.datetime.now() ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as failed") session = settings.Session() for task in context['task'].downstream_list: if task.task_id != branch: ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() for task in context['task'].downstream_list: if task.task_id != branch: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(JollyShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return else: logging.info('Skipping downstream tasks...') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return else: logging.info('Skipping downstream tasks...') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return logging.info('Skipping downstream tasks...') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning( "Task {} was not part of a dag run. This should not happen.". format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def skip(self, dag_run, execution_date, tasks, session=None): """ Sets tasks instances to skipped from the same dag run. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return task_ids = [d.task_id for d in tasks] now = timezone.utcnow() if dag_run: session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date, TaskInstance.task_id.in_(task_ids)).update( { TaskInstance.state: State.SKIPPED, TaskInstance.start_date: now, TaskInstance.end_date: now }, synchronize_session=False) session.commit() else: assert execution_date is not None, "Execution date is None and no dag run" self.log.warning("No DAG RUN present this should not happen") # this is defensive against dag runs that are not complete for task in tasks: ti = TaskInstance(task, execution_date=execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit()
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), TI.task_id != branch, ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue if task.task_id == branch: continue logging.warning( "Task {} was not part of a dag run. This should not happen.". format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return logging.info('Skipping downstream tasks...') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. This should not happen." .format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), TI.task_id != branch, ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue if task.task_id == branch: continue logging.warning("Task {} was not part of a dag run. This should not happen." .format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def skip(self, dag_run, execution_date, tasks, session=None): """ Sets tasks instances to skipped from the same dag run. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return task_ids = [d.task_id for d in tasks] now = timezone.utcnow() if dag_run: session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date, TaskInstance.task_id.in_(task_ids) ).update({TaskInstance.state: State.SKIPPED, TaskInstance.start_date: now, TaskInstance.end_date: now}, synchronize_session=False) session.commit() else: assert execution_date is not None, "Execution date is None and no dag run" self.log.warning("No DAG RUN present this should not happen") # this is defensive against dag runs that are not complete for task in tasks: ti = TaskInstance(task, execution_date=execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit()
def execute(self, context): now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) _log.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: _log.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) _log.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() _log.info('Done.') else: _log.info('Latest, allowing execution to proceed.')
def test_extra_link_in_gantt_view(dag, viewer_client): exec_date = dates.days_ago(2) start_date = timezone.datetime(2020, 4, 10, 2, 0, 0) end_date = exec_date + datetime.timedelta(seconds=30) with create_session() as session: for task in dag.tasks: ti = TaskInstance(task=task, execution_date=exec_date, state="success") ti.start_date = start_date ti.end_date = end_date session.add(ti) url = f'gantt?dag_id={dag.dag_id}&execution_date={exec_date}' resp = viewer_client.get(url, follow_redirects=True) check_content_in_response('"extraLinks":', resp) extra_links_grps = re.search(r'extraLinks\": \[(\".*?\")\]', resp.get_data(as_text=True)) extra_links = extra_links_grps.group(0) assert 'airflow' in extra_links assert 'github' in extra_links
def execute(self, context): now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance( task, execution_date=context['ti'].execution_date) logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def test_lineage_backend_capture_executions(mock_emit, inlets, outlets): DEFAULT_DATE = datetime.datetime(2020, 5, 17) mock_emitter = Mock() mock_emit.return_value = mock_emitter # Using autospec on xcom_pull and xcom_push methods fails on Python 3.6. with mock.patch.dict( os.environ, { "AIRFLOW__LINEAGE__BACKEND": "datahub_provider.lineage.datahub.DatahubLineageBackend", "AIRFLOW__LINEAGE__DATAHUB_CONN_ID": datahub_rest_connection_config.conn_id, "AIRFLOW__LINEAGE__DATAHUB_KWARGS": json.dumps({ "graceful_exceptions": False, "capture_executions": True }), }, ), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch( "airflow.models.BaseOperator.xcom_push"), patch_airflow_connection( datahub_rest_connection_config): func = mock.Mock() func.__name__ = "foo" dag = DAG(dag_id="test_lineage_is_sent_to_backend", start_date=DEFAULT_DATE) with dag: op1 = DummyOperator( task_id="task1_upstream", inlets=inlets, outlets=outlets, ) op2 = DummyOperator( task_id="task2", inlets=inlets, outlets=outlets, ) op1 >> op2 # Airflow < 2.2 requires the execution_date parameter. Newer Airflow # versions do not require it, but will attempt to find the associated # run_id in the database if execution_date is provided. As such, we # must fake the run_id parameter for newer Airflow versions. if AIRFLOW_VERSION < packaging.version.parse("2.2.0"): ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE) # Ignoring type here because DagRun state is just a sring at Airflow 1 dag_run = DagRun( state="success", run_id=f"scheduled_{DEFAULT_DATE}") # type: ignore ti.dag_run = dag_run ti.start_date = datetime.datetime.utcnow() ti.execution_date = DEFAULT_DATE else: from airflow.utils.state import DagRunState ti = TaskInstance(task=op2, run_id=f"test_airflow-{DEFAULT_DATE}") dag_run = DagRun(state=DagRunState.SUCCESS, run_id=f"scheduled_{DEFAULT_DATE}") ti.dag_run = dag_run ti.start_date = datetime.datetime.utcnow() ti.execution_date = DEFAULT_DATE ctx1 = { "dag": dag, "task": op2, "ti": ti, "dag_run": dag_run, "task_instance": ti, "execution_date": DEFAULT_DATE, "ts": "2021-04-08T00:54:25.771575+00:00", } prep = prepare_lineage(func) prep(op2, ctx1) post = apply_lineage(func) post(op2, ctx1) # Verify that the inlets and outlets are registered and recognized by Airflow correctly, # or that our lineage backend forces it to. assert len(op2.inlets) == 1 assert len(op2.outlets) == 1 assert all(map(lambda let: isinstance(let, Dataset), op2.inlets)) assert all(map(lambda let: isinstance(let, Dataset), op2.outlets)) # Check that the right things were emitted. assert mock_emitter.emit.call_count == 17 # Running further checks based on python version because args only exists in python 3.7+ if sys.version_info[:3] > (3, 7): assert mock_emitter.method_calls[0].args[ 0].aspectName == "dataFlowInfo" assert ( mock_emitter.method_calls[0].args[0].entityUrn == "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)" ) assert mock_emitter.method_calls[1].args[ 0].aspectName == "ownership" assert ( mock_emitter.method_calls[1].args[0].entityUrn == "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)" ) assert mock_emitter.method_calls[2].args[ 0].aspectName == "globalTags" assert ( mock_emitter.method_calls[2].args[0].entityUrn == "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)" ) assert mock_emitter.method_calls[3].args[ 0].aspectName == "dataJobInfo" assert ( mock_emitter.method_calls[3].args[0].entityUrn == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)" ) assert (mock_emitter.method_calls[4].args[0].aspectName == "dataJobInputOutput") assert ( mock_emitter.method_calls[4].args[0].entityUrn == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)" ) assert ( mock_emitter.method_calls[4].args[0].aspect.inputDatajobs[0] == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task1_upstream)" ) assert ( mock_emitter.method_calls[4].args[0].aspect.inputDatasets[0] == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)" ) assert ( mock_emitter.method_calls[4].args[0].aspect.outputDatasets[0] == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)" ) assert mock_emitter.method_calls[5].args[0].aspectName == "status" assert ( mock_emitter.method_calls[5].args[0].entityUrn == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)" ) assert mock_emitter.method_calls[6].args[0].aspectName == "status" assert ( mock_emitter.method_calls[6].args[0].entityUrn == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)" ) assert mock_emitter.method_calls[7].args[ 0].aspectName == "ownership" assert ( mock_emitter.method_calls[7].args[0].entityUrn == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)" ) assert mock_emitter.method_calls[8].args[ 0].aspectName == "globalTags" assert ( mock_emitter.method_calls[8].args[0].entityUrn == "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)" ) assert (mock_emitter.method_calls[9].args[0].aspectName == "dataProcessInstanceProperties") assert ( mock_emitter.method_calls[9].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb") assert (mock_emitter.method_calls[10].args[0].aspectName == "dataProcessInstanceRelationships") assert ( mock_emitter.method_calls[10].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb") assert (mock_emitter.method_calls[11].args[0].aspectName == "dataProcessInstanceInput") assert ( mock_emitter.method_calls[11].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb") assert (mock_emitter.method_calls[12].args[0].aspectName == "dataProcessInstanceOutput") assert ( mock_emitter.method_calls[12].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb") assert mock_emitter.method_calls[13].args[0].aspectName == "status" assert ( mock_emitter.method_calls[13].args[0].entityUrn == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)" ) assert mock_emitter.method_calls[14].args[0].aspectName == "status" assert ( mock_emitter.method_calls[14].args[0].entityUrn == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)" ) assert (mock_emitter.method_calls[15].args[0].aspectName == "dataProcessInstanceRunEvent") assert ( mock_emitter.method_calls[15].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb") assert (mock_emitter.method_calls[16].args[0].aspectName == "dataProcessInstanceRunEvent") assert ( mock_emitter.method_calls[16].args[0].entityUrn == "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")
def create_dagrun_from_dbnd_run( databand_run, dag, execution_date, run_id, state=State.RUNNING, external_trigger=False, conf=None, session=None, ): """ Create new DagRun and all relevant TaskInstances """ dagrun = (session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == execution_date).first()) if dagrun is None: dagrun = DagRun( run_id=run_id, execution_date=execution_date, start_date=dag.start_date, _state=state, external_trigger=external_trigger, dag_id=dag.dag_id, conf=conf, ) session.add(dagrun) else: logger.warning("Running with existing airflow dag run %s", dagrun) dagrun.dag = dag dagrun.run_id = run_id session.commit() # create the associated task instances # state is None at the moment of creation # dagrun.verify_integrity(session=session) # fetches [TaskInstance] again # tasks_skipped = databand_run.tasks_skipped # we can find a source of the completion, but also, # sometimes we don't know the source of the "complete" TI = TaskInstance tis = (session.query(TI).filter(TI.dag_id == dag.dag_id, TI.execution_date == execution_date).all()) tis = {ti.task_id: ti for ti in tis} for af_task in dag.tasks: ti = tis.get(af_task.task_id) if ti is None: ti = TaskInstance(af_task, execution_date=execution_date) ti.start_date = timezone.utcnow() ti.end_date = timezone.utcnow() session.add(ti) task_run = databand_run.get_task_run_by_af_id(af_task.task_id) # all tasks part of the backfill are scheduled to dagrun # Set log file path to expected airflow log file path task_run.log.local_log_file.path = ti.log_filepath.replace( ".log", "/{0}.log".format(ti.try_number)) if task_run.is_reused: # this task is completed and we don't need to run it anymore ti.state = State.SUCCESS session.commit() return dagrun