class PythonIdempatomicFileOperatorTest_Idempotent(unittest.TestCase): def f(self, output_path): with open(output_path, "a+") as fout: fout.write("test") def test_PyIdempaOp_idempotent(self): self.dag = DAG( TEST_DAG_ID, schedule_interval="@daily", default_args={"start_date": datetime.now()}, ) with TemporaryDirectory() as tempdir: output_path = f"{tempdir}/test_file.txt" self.assertFalse( os.path.exists(output_path)) # ensure doesn't already exist self.op = PythonIdempatomicFileOperator( dag=self.dag, task_id="test", output_pattern=output_path, python_callable=self.f, ) self.ti = TaskInstance(task=self.op, execution_date=datetime.now()) result = self.op.execute(self.ti.get_template_context()) self.assertEqual(result, output_path) self.assertFalse(self.op.previously_completed) self.assertTrue(os.path.exists(output_path)) with open(output_path, "r") as fout: self.assertEqual(fout.read(), "test") # now run task again result = self.op.execute(self.ti.get_template_context()) self.assertEqual(result, output_path) # result will still give path self.assertTrue(self.op.previously_completed) # if function had run again, it would now be 'testtest' with open(output_path, "r") as fout: self.assertEqual(fout.read(), "test") # run function again to ensure 'testtest' is written to file upon second call self.f(output_path) with open(output_path, "r") as fout: self.assertEqual(fout.read(), "testtest")
def kill_zombies(self, zombies, session=None): """ Fail given zombie tasks, which are tasks that haven't had a heartbeat for too long, in the current DagBag. :param zombies: zombie task instances to kill. :type zombies: airflow.utils.dag_processing.SimpleTaskInstance :param session: DB session. :type session: sqlalchemy.orm.session.Session """ from airflow.models.taskinstance import TaskInstance # Avoid circular import for zombie in zombies: if zombie.dag_id in self.dags: dag = self.dags[zombie.dag_id] if zombie.task_id in dag.task_ids: task = dag.get_task(zombie.task_id) ti = TaskInstance(task, zombie.execution_date) # Get properties needed for failure handling from SimpleTaskInstance. ti.start_date = zombie.start_date ti.end_date = zombie.end_date ti.try_number = zombie.try_number ti.state = zombie.state ti.test_mode = self.UNIT_TEST_MODE ti.handle_failure("{} detected as zombie".format(ti), ti.test_mode, ti.get_template_context()) self.log.info('Marked zombie job %s as %s', ti, ti.state) session.commit()
class PythonIdempatomicFileOperatorTest_Atomic(unittest.TestCase): def g(self, output_path): with open(output_path, "w") as fout: fout.write("test") raise ValueError("You cannot write that!") def test_PyIdempaOp_atomic(self): self.dag = DAG( TEST_DAG_ID, schedule_interval="@daily", default_args={"start_date": datetime.now()}, ) with TemporaryDirectory() as tempdir: output_path = f"{tempdir}/test.txt" self.assertFalse( os.path.exists(output_path)) # ensure doesn't already exist self.op = PythonIdempatomicFileOperator( dag=self.dag, task_id="test", output_pattern=output_path, python_callable=self.g, ) self.ti = TaskInstance(task=self.op, execution_date=datetime.now()) with self.assertRaises( ValueError): # ensure ValueError is triggered (since task ran) result = self.op.execute(self.ti.get_template_context()) self.assertEqual(result, None) # make sure no path is returned self.assertFalse( os.path.exists(output_path)) # no partially written file
def test_should_continue_with_cp(load_dag): dag_bag = load_dag('bq_to_wrench') dag = dag_bag.get_dag('bq_to_wrench') table = 'staging.users' task = dag.get_task(f'continue_if_data_{table}') assert isinstance(task, BranchPythonOperator) ti = TaskInstance(task=task, execution_date=datetime.now()) XCom.set(key=table, value={'has_data': True}, task_id=task.task_id, dag_id=dag.dag_id, execution_date=ti.execution_date) task.execute(ti.get_template_context())
def test_sets_initial_checkpoint(load_dag, env, bigquery_helper): # Remove all checkpoints for table table = 'staging.users' bigquery_helper.query( f"DELETE FROM `{env['project']}.system.checkpoint` WHERE table = '{table}'" ) # Execute get checkpoint task. I expect it to create an initial checkpoint. dag_bag = load_dag('bq_to_wrench') dag = dag_bag.get_dag('bq_to_wrench') task = dag.get_task(f'get_checkpoint_{table}') assert isinstance(task, GetCheckpointOperator) ti = TaskInstance(task=task, execution_date=datetime.now()) task.execute(ti.get_template_context())
def _kill_zombies(self, dag, zombies, session): """ copy paste from airflow.models.dagbag.DagBag.kill_zombies """ from airflow.models.taskinstance import TaskInstance # Avoid circular import for zombie in zombies: if zombie.task_id in dag.task_ids: task = dag.get_task(zombie.task_id) ti = TaskInstance(task, zombie.execution_date) # Get properties needed for failure handling from SimpleTaskInstance. ti.start_date = zombie.start_date ti.end_date = zombie.end_date ti.try_number = zombie.try_number ti.state = zombie.state # ti.test_mode = self.UNIT_TEST_MODE ti.handle_failure( "{} detected as zombie".format(ti), ti.test_mode, ti.get_template_context(), ) self.log.info("Marked zombie job %s as %s", ti, ti.state) session.commit()
def get_link(self, operator, dttm): ti = TaskInstance(task=operator, execution_date=dttm) operator.render_template_fields(ti.get_template_context()) query = {"dag_id": operator.external_dag_id, "execution_date": dttm.isoformat()} return build_airflow_url_with_query(query)