class PythonIdempatomicFileOperatorTest_Idempotent(unittest.TestCase):
    def f(self, output_path):
        with open(output_path, "a+") as fout:
            fout.write("test")

    def test_PyIdempaOp_idempotent(self):
        self.dag = DAG(
            TEST_DAG_ID,
            schedule_interval="@daily",
            default_args={"start_date": datetime.now()},
        )

        with TemporaryDirectory() as tempdir:
            output_path = f"{tempdir}/test_file.txt"

            self.assertFalse(
                os.path.exists(output_path))  # ensure doesn't already exist

            self.op = PythonIdempatomicFileOperator(
                dag=self.dag,
                task_id="test",
                output_pattern=output_path,
                python_callable=self.f,
            )
            self.ti = TaskInstance(task=self.op, execution_date=datetime.now())

            result = self.op.execute(self.ti.get_template_context())
            self.assertEqual(result, output_path)
            self.assertFalse(self.op.previously_completed)
            self.assertTrue(os.path.exists(output_path))

            with open(output_path, "r") as fout:
                self.assertEqual(fout.read(), "test")

            # now run task again
            result = self.op.execute(self.ti.get_template_context())
            self.assertEqual(result,
                             output_path)  # result will still give path
            self.assertTrue(self.op.previously_completed)

            # if function had run again, it would now be 'testtest'
            with open(output_path, "r") as fout:
                self.assertEqual(fout.read(), "test")

            # run function again to ensure 'testtest' is written to file upon second call
            self.f(output_path)
            with open(output_path, "r") as fout:
                self.assertEqual(fout.read(), "testtest")
Exemplo n.º 2
0
    def kill_zombies(self, zombies, session=None):
        """
        Fail given zombie tasks, which are tasks that haven't
        had a heartbeat for too long, in the current DagBag.

        :param zombies: zombie task instances to kill.
        :type zombies: airflow.utils.dag_processing.SimpleTaskInstance
        :param session: DB session.
        :type session: sqlalchemy.orm.session.Session
        """
        from airflow.models.taskinstance import TaskInstance  # Avoid circular import

        for zombie in zombies:
            if zombie.dag_id in self.dags:
                dag = self.dags[zombie.dag_id]
                if zombie.task_id in dag.task_ids:
                    task = dag.get_task(zombie.task_id)
                    ti = TaskInstance(task, zombie.execution_date)
                    # Get properties needed for failure handling from SimpleTaskInstance.
                    ti.start_date = zombie.start_date
                    ti.end_date = zombie.end_date
                    ti.try_number = zombie.try_number
                    ti.state = zombie.state
                    ti.test_mode = self.UNIT_TEST_MODE
                    ti.handle_failure("{} detected as zombie".format(ti),
                                      ti.test_mode, ti.get_template_context())
                    self.log.info('Marked zombie job %s as %s', ti, ti.state)
        session.commit()
class PythonIdempatomicFileOperatorTest_Atomic(unittest.TestCase):
    def g(self, output_path):
        with open(output_path, "w") as fout:
            fout.write("test")
            raise ValueError("You cannot write that!")

    def test_PyIdempaOp_atomic(self):
        self.dag = DAG(
            TEST_DAG_ID,
            schedule_interval="@daily",
            default_args={"start_date": datetime.now()},
        )

        with TemporaryDirectory() as tempdir:
            output_path = f"{tempdir}/test.txt"

            self.assertFalse(
                os.path.exists(output_path))  # ensure doesn't already exist

            self.op = PythonIdempatomicFileOperator(
                dag=self.dag,
                task_id="test",
                output_pattern=output_path,
                python_callable=self.g,
            )
            self.ti = TaskInstance(task=self.op, execution_date=datetime.now())

        with self.assertRaises(
                ValueError):  # ensure ValueError is triggered (since task ran)
            result = self.op.execute(self.ti.get_template_context())
            self.assertEqual(result, None)  # make sure no path is returned

        self.assertFalse(
            os.path.exists(output_path))  # no partially written file
Exemplo n.º 4
0
def test_should_continue_with_cp(load_dag):
    dag_bag = load_dag('bq_to_wrench')
    dag = dag_bag.get_dag('bq_to_wrench')
    table = 'staging.users'
    task = dag.get_task(f'continue_if_data_{table}')
    assert isinstance(task, BranchPythonOperator)
    ti = TaskInstance(task=task, execution_date=datetime.now())
    XCom.set(key=table,
             value={'has_data': True},
             task_id=task.task_id,
             dag_id=dag.dag_id,
             execution_date=ti.execution_date)

    task.execute(ti.get_template_context())
Exemplo n.º 5
0
def test_sets_initial_checkpoint(load_dag, env, bigquery_helper):
    # Remove all checkpoints for table
    table = 'staging.users'
    bigquery_helper.query(
        f"DELETE FROM `{env['project']}.system.checkpoint` WHERE table = '{table}'"
    )

    # Execute get checkpoint task. I expect it to create an initial checkpoint.
    dag_bag = load_dag('bq_to_wrench')
    dag = dag_bag.get_dag('bq_to_wrench')
    task = dag.get_task(f'get_checkpoint_{table}')
    assert isinstance(task, GetCheckpointOperator)
    ti = TaskInstance(task=task, execution_date=datetime.now())
    task.execute(ti.get_template_context())
Exemplo n.º 6
0
    def _kill_zombies(self, dag, zombies, session):
        """
        copy paste from airflow.models.dagbag.DagBag.kill_zombies
        """
        from airflow.models.taskinstance import TaskInstance  # Avoid circular import

        for zombie in zombies:
            if zombie.task_id in dag.task_ids:
                task = dag.get_task(zombie.task_id)
                ti = TaskInstance(task, zombie.execution_date)
                # Get properties needed for failure handling from SimpleTaskInstance.
                ti.start_date = zombie.start_date
                ti.end_date = zombie.end_date
                ti.try_number = zombie.try_number
                ti.state = zombie.state
                # ti.test_mode = self.UNIT_TEST_MODE
                ti.handle_failure(
                    "{} detected as zombie".format(ti),
                    ti.test_mode,
                    ti.get_template_context(),
                )
                self.log.info("Marked zombie job %s as %s", ti, ti.state)
        session.commit()
Exemplo n.º 7
0
 def get_link(self, operator, dttm):
     ti = TaskInstance(task=operator, execution_date=dttm)
     operator.render_template_fields(ti.get_template_context())
     query = {"dag_id": operator.external_dag_id, "execution_date": dttm.isoformat()}
     return build_airflow_url_with_query(query)