def test_file_task_handler(self):
        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = DummyOperator(task_id='task_for_testing_file_log_handler', dag=dag)
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = logging.getLogger(TASK_LOGGER)
        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        file_handler.set_context(ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))

        logger.info("test")
        ti.run()

        self.assertTrue(hasattr(file_handler, 'read'))
        # Return value of read must be a list.
        logs = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        self.assertEqual(len(logs), 1)

        # Remove the generated tmp log file.
        os.remove(log_filename)
Пример #2
0
    def test_xcom_push_flag(self):
        """
        Tests the option for Operators to push XComs
        """
        value = 'hello'
        task_id = 'test_no_xcom_push'
        dag = models.DAG(dag_id='test_xcom')

        # nothing saved to XCom
        task = PythonOperator(
            task_id=task_id,
            dag=dag,
            python_callable=lambda: value,
            do_xcom_push=False,
            owner='airflow',
            start_date=datetime.datetime(2017, 1, 1)
        )
        ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1))
        ti.run()
        self.assertEqual(
            ti.xcom_pull(
                task_ids=task_id, key=models.XCOM_RETURN_KEY
            ),
            None
        )
    def test_file_transfer_no_intermediate_dir_error_get(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag
            )
            self.assertIsNotNone(get_test_task)
            ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
Пример #4
0
    def test_dag_clear(self):
        dag = DAG('test_dag_clear', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='test_dag_clear_task_0', owner='test', dag=dag)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        # Next try to run will be try 1
        self.assertEqual(ti0.try_number, 1)
        ti0.run()
        self.assertEqual(ti0.try_number, 2)
        dag.clear()
        ti0.refresh_from_db()
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.state, State.NONE)
        self.assertEqual(ti0.max_tries, 1)

        task1 = DummyOperator(task_id='test_dag_clear_task_1', owner='test',
                              dag=dag, retries=2)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)
        self.assertEqual(ti1.max_tries, 2)
        ti1.try_number = 1
        # Next try will be 2
        ti1.run()
        self.assertEqual(ti1.try_number, 3)
        self.assertEqual(ti1.max_tries, 2)

        dag.clear()
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # after clear dag, ti2 should show attempt 3 of 5
        self.assertEqual(ti1.max_tries, 4)
        self.assertEqual(ti1.try_number, 3)
        # after clear dag, ti1 should show attempt 2 of 2
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.max_tries, 1)
Пример #5
0
    def test_post_execute_hook(self):
        """
        Test that post_execute hook is called with the Operator's result.
        The result ('error') will cause an error to be raised and trapped.
        """

        class TestError(Exception):
            pass

        class TestOperator(PythonOperator):
            def post_execute(self, context, result):
                if result == 'error':
                    raise TestError('expected error.')

        dag = models.DAG(dag_id='test_post_execute_dag')
        task = TestOperator(
            task_id='test_operator',
            dag=dag,
            python_callable=lambda: 'error',
            owner='airflow',
            start_date=datetime.datetime(2017, 2, 1))
        ti = TI(task=task, execution_date=datetime.datetime.now())

        with self.assertRaises(TestError):
            ti.run()
    def test_file_transfer_no_intermediate_dir_error_put(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # Try to put test file to remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath_int_dir,
                operation=SFTPOperation.PUT,
                create_intermediate_dirs=False,
                dag=self.dag
            )
            self.assertIsNotNone(put_test_task)
            ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
Пример #7
0
    def test_clear_task_instances_without_task(self):
        dag = DAG('test_clear_task_instances_without_task', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='task0', owner='test', dag=dag)
        task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)
        ti0.run()
        ti1.run()

        # Remove the task from dag.
        dag.task_dict = {}
        self.assertFalse(dag.has_task(task0.task_id))
        self.assertFalse(dag.has_task(task1.task_id))

        session = settings.Session()
        qry = session.query(TI).filter(
            TI.dag_id == dag.dag_id).all()
        clear_task_instances(qry, session)
        session.commit()
        # When dag is None, max_tries will be maximum of original max_tries or try_number.
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # Next try to run will be try 2
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.max_tries, 1)
        self.assertEqual(ti1.try_number, 2)
        self.assertEqual(ti1.max_tries, 2)
Пример #8
0
    def test_email_alert_with_config(self, mock_send_email):
        dag = models.DAG(dag_id='test_failure_email')
        task = BashOperator(
            task_id='test_email_alert_with_config',
            dag=dag,
            bash_command='exit 1',
            start_date=DEFAULT_DATE,
            email='to')

        ti = TI(
            task=task, execution_date=datetime.datetime.now())

        configuration.set('email', 'SUBJECT_TEMPLATE', '/subject/path')
        configuration.set('email', 'HTML_CONTENT_TEMPLATE', '/html_content/path')

        opener = mock_open(read_data='template: {{ti.task_id}}')
        with patch('airflow.models.taskinstance.open', opener, create=True):
            try:
                ti.run()
            except AirflowException:
                pass

        (email, title, body), _ = mock_send_email.call_args
        self.assertEqual(email, 'to')
        self.assertEqual('template: test_email_alert_with_config', title)
        self.assertEqual('template: test_email_alert_with_config', body)
Пример #9
0
    def test_file_transfer_put(self):
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.PUT,
                dag=self.dag
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task, execution_date=datetime.now())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
                task_id="test_check_file",
                ssh_hook=self.hook,
                command="cat {0}".format(self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=datetime.now())
        ti3.run()
        self.assertEqual(
                ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
                test_local_file_content)
    def test_s3_to_sftp_operation(self):
        # Setting
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # Test for creation of s3 bucket
        conn = boto3.client('s3')
        conn.create_bucket(Bucket=self.s3_bucket)
        self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket)))

        with open(LOCAL_FILE_PATH, 'w') as f:
            f.write(test_remote_file_content)
        self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET)

        # Check if object was created in s3
        objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket,
                                                   Prefix=self.s3_key)
        # there should be object found, and there should only be one object found
        self.assertEqual(len(objects_in_dest_bucket['Contents']), 1)

        # the object found should be consistent with dest_key specified earlier
        self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key)

        # get remote file to local
        run_task = S3ToSFTPOperator(
            s3_bucket=BUCKET,
            s3_key=S3_KEY,
            sftp_path=SFTP_PATH,
            sftp_conn_id=SFTP_CONN_ID,
            s3_conn_id=S3_CONN_ID,
            task_id=TASK_ID,
            dag=self.dag
        )
        self.assertIsNotNone(run_task)

        run_task.execute(None)

        # Check that the file is created remotely
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command="cat {0}".format(self.sftp_path),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
            test_remote_file_content.encode('utf-8'))

        # Clean up after finishing with test
        conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key)
        conn.delete_bucket(Bucket=self.s3_bucket)
        self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
Пример #11
0
def test(args):
    log_to_stdout()
    args.execution_date = dateutil.parser.parse(args.execution_date)
    dagbag = DagBag(args.subdir)
    if args.dag_id not in dagbag.dags:
        raise AirflowException('dag_id could not be found')
    dag = dagbag.dags[args.dag_id]
    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.run(force=True, ignore_dependencies=True, test_mode=True)
Пример #12
0
    def test_requeue_over_concurrency(self, mock_concurrency_reached):
        mock_concurrency_reached.return_value = True

        dag = DAG(dag_id='test_requeue_over_concurrency', start_date=DEFAULT_DATE,
                  max_active_runs=1, concurrency=2)
        task = DummyOperator(task_id='test_requeue_over_concurrency_op', dag=dag)

        ti = TI(task=task, execution_date=datetime.datetime.now())
        ti.run()
        self.assertEqual(ti.state, models.State.NONE)
Пример #13
0
 def delete_remote_resource(self):
     # check the remote file content
     remove_file_task = SSHOperator(
             task_id="test_check_file",
             ssh_hook=self.hook,
             command="rm {0}".format(self.test_remote_filepath),
             do_xcom_push=True,
             dag=self.dag
     )
     self.assertIsNotNone(remove_file_task)
     ti3 = TaskInstance(task=remove_file_task, execution_date=datetime.now())
     ti3.run()
    def test_sftp_to_s3_operation(self):
        # Setting
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.sftp_path),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow())
        ti1.run()

        # Test for creation of s3 bucket
        conn = boto3.client('s3')
        conn.create_bucket(Bucket=self.s3_bucket)
        self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket)))

        # get remote file to local
        run_task = SFTPToS3Operator(
            s3_bucket=BUCKET,
            s3_key=S3_KEY,
            sftp_path=SFTP_PATH,
            sftp_conn_id=SFTP_CONN_ID,
            s3_conn_id=S3_CONN_ID,
            task_id='test_sftp_to_s3',
            dag=self.dag
        )
        self.assertIsNotNone(run_task)

        run_task.execute(None)

        # Check if object was created in s3
        objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket,
                                                   Prefix=self.s3_key)
        # there should be object found, and there should only be one object found
        self.assertEqual(len(objects_in_dest_bucket['Contents']), 1)

        # the object found should be consistent with dest_key specified earlier
        self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key)

        # Clean up after finishing with test
        conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key)
        conn.delete_bucket(Bucket=self.s3_bucket)
        self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
    def test_file_task_handler(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")
        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
            provide_context=True
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False

        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        set_context(logger, ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))
        self.assertTrue(log_filename.endswith("1.log"), log_filename)

        ti.run(ignore_ti_state=True)

        file_handler.flush()
        file_handler.close()

        self.assertTrue(hasattr(file_handler, 'read'))
        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        self.assertTrue(isinstance(metadatas, list))
        self.assertEqual(len(logs), 1)
        self.assertEqual(len(logs), len(metadatas))
        self.assertTrue(isinstance(metadatas[0], dict))
        target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n'

        # We should expect our log line from the callable above to appear in
        # the logs we read back
        six.assertRegex(
            self,
            logs[0],
            target_re,
            "Logs were " + str(logs)
        )

        # Remove the generated tmp log file.
        os.remove(log_filename)
Пример #16
0
 def test_run_pooling_task(self):
     """
     test that running task with mark_success param update task state as
     SUCCESS without running task.
     """
     dag = models.DAG(dag_id='test_run_pooling_task')
     task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag,
                          pool='test_run_pooling_task_pool', owner='airflow',
                          start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
     ti = TI(
         task=task, execution_date=datetime.datetime.now())
     ti.run()
     self.assertEqual(ti.state, models.State.QUEUED)
Пример #17
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)

        # Test AIRFLOW-703: Xcom shouldn't be cleared if the task doesn't
        # execute, even if dependencies are ignored
        ti.run(ignore_all_deps=True, mark_success=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        # Xcom IS finally cleared once task has executed
        ti.run(ignore_all_deps=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
Пример #18
0
def test(args, dag=None):
    dag = dag or get_dag(args)

    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    if args.dry_run:
        ti.dry_run()
    else:
        ti.run(force=True, ignore_dependencies=True, test_mode=True)
Пример #19
0
def test(args):
    log_to_stdout()
    args.execution_date = dateutil.parser.parse(args.execution_date)
    if args.subdir:
        subdir = args.subdir.replace(
            "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER"))
        subdir = os.path.expanduser(subdir)
    dagbag = DagBag(subdir)
    if args.dag_id not in dagbag.dags:
        raise AirflowException('dag_id could not be found')
    dag = dagbag.dags[args.dag_id]
    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.run(force=True, ignore_dependencies=True, test_mode=True)
Пример #20
0
 def test_run_pooling_task_with_mark_success(self):
     """
     test that running task with mark_success param update task state as SUCCESS
     without running task.
     """
     dag = models.DAG(dag_id="test_run_pooling_task_with_mark_success")
     task = DummyOperator(
         task_id="test_run_pooling_task_with_mark_success_op",
         dag=dag,
         pool="test_run_pooling_task_with_mark_success_pool",
         owner="airflow",
         start_date=datetime.datetime(2016, 2, 1, 0, 0, 0),
     )
     ti = TI(task=task, execution_date=datetime.datetime.now())
     ti.run(mark_success=True)
     self.assertEqual(ti.state, models.State.SUCCESS)
Пример #21
0
    def test_command_execution_with_env(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=datetime.now())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
Пример #22
0
    def test_run_pooling_task(self, mock_pool_full):
        """
        test that running task with mark_success param update task state as
        SUCCESS without running task.
        """
        # Mock the pool out with a full pool because the pool doesn't actually exist
        mock_pool_full.return_value = True

        dag = models.DAG(dag_id='test_run_pooling_task')
        task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag,
                             pool='test_run_pooling_task_pool', owner='airflow',
                             start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
        ti = TI(
            task=task, execution_date=datetime.datetime.now())
        ti.run()
        self.assertEqual(ti.state, models.State.QUEUED)
Пример #23
0
    def test_run_pooling_task(self, mock_pool_full):
        """
        test that running task update task state as  without running task.
        (no dependency check in ti_deps anymore, so also -> SUCCESS)
        """
        # Mock the pool out with a full pool because the pool doesn't actually exist
        mock_pool_full.return_value = True

        dag = models.DAG(dag_id='test_run_pooling_task')
        task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag,
                             pool='test_run_pooling_task_pool', owner='airflow',
                             start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
        ti = TI(
            task=task, execution_date=datetime.datetime.now())
        ti.run()
        self.assertEqual(ti.state, models.State.SUCCESS)
Пример #24
0
 def delete_remote_resource(self):
     if os.path.exists(self.test_remote_filepath):
         # check the remote file content
         remove_file_task = SSHOperator(
             task_id="test_check_file",
             ssh_hook=self.hook,
             command="rm {0}".format(self.test_remote_filepath),
             do_xcom_push=True,
             dag=self.dag
         )
         self.assertIsNotNone(remove_file_task)
         ti3 = TaskInstance(task=remove_file_task, execution_date=timezone.utcnow())
         ti3.run()
     if os.path.exists(self.test_remote_filepath_int_dir):
         os.remove(self.test_remote_filepath_int_dir)
     if os.path.exists(self.test_remote_dir):
         os.rmdir(self.test_remote_dir)
Пример #25
0
    def test_pickle_command_execution(self):
        configuration.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
                task_id="test",
                ssh_hook=self.hook,
                command="echo -n airflow",
                do_xcom_push=True,
                dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
                task=task, execution_date=datetime.now())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
    def test_no_output_command(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="sleep 1",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
Пример #27
0
def test(args):
    args.execution_date = dateutil.parser.parse(args.execution_date)
    dagbag = DagBag(process_subdir(args.subdir))
    if args.dag_id not in dagbag.dags:
        raise AirflowException('dag_id could not be found')
    dag = dagbag.dags[args.dag_id]
    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    if args.dry_run:
        ti.dry_run()
    else:
        ti.run(force=True, ignore_dependencies=True, test_mode=True)
Пример #28
0
    def test_run_pooling_task_with_skip(self):
        """
        test that running task which returns AirflowSkipOperator will end
        up in a SKIPPED state.
        """

        def raise_skip_exception():
            raise AirflowSkipException

        dag = models.DAG(dag_id='test_run_pooling_task_with_skip')
        task = PythonOperator(
            task_id='test_run_pooling_task_with_skip',
            dag=dag,
            python_callable=raise_skip_exception,
            owner='airflow',
            start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
        ti = TI(
            task=task, execution_date=datetime.datetime.now())
        ti.run()
        self.assertTrue(ti.state == models.State.SKIPPED)
Пример #29
0
    def test_email_alert(self, mock_send_email):
        dag = models.DAG(dag_id='test_failure_email')
        task = BashOperator(
            task_id='test_email_alert',
            dag=dag,
            bash_command='exit 1',
            start_date=DEFAULT_DATE,
            email='to')

        ti = TI(task=task, execution_date=datetime.datetime.now())

        try:
            ti.run()
        except AirflowException:
            pass

        (email, title, body), _ = mock_send_email.call_args
        self.assertEqual(email, 'to')
        self.assertIn('test_email_alert', title)
        self.assertIn('test_email_alert', body)
        self.assertIn('Try 1', body)
Пример #30
0
    def test_clear_task_instances(self):
        dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='0', owner='test', dag=dag)
        task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)

        ti0.run()
        ti1.run()
        session = settings.Session()
        qry = session.query(TI).filter(
            TI.dag_id == dag.dag_id).all()
        clear_task_instances(qry, session, dag=dag)
        session.commit()
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        self.assertEqual(ti0.try_number, 1)
        self.assertEqual(ti0.max_tries, 1)
        self.assertEqual(ti1.try_number, 1)
        self.assertEqual(ti1.max_tries, 3)
Пример #31
0
    def test_json_file_transfer_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            dag=self.dag,
        )
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath, 'r') as file:
            content_received = file.read()
        self.assertEqual(
            content_received.strip(),
            test_remote_file_content.encode('utf-8').decode('utf-8'))
Пример #32
0
    def test_json_file_transfer_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="put_test_task",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            dag=self.dag,
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="check_file_task",
            ssh_hook=self.hook,
            command="cat {0}".format(self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids=check_file_task.task_id,
                          key='return_value').strip(),
            b64encode(test_local_file_content).decode('utf-8'),
        )
Пример #33
0
    def test_xcom_pull_different_execution_date(self):
        """
        tests xcom fetch behavior with different execution dates, using
        both xcom_pull with "include_prior_dates" and without
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(task_id='test_xcom',
                             dag=dag,
                             pool='test_xcom',
                             owner='airflow',
                             start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        exec_date += datetime.timedelta(days=1)
        ti = TI(task=task, execution_date=exec_date)
        ti.run()
        # We have set a new execution date (and did not pass in
        # 'include_prior_dates'which means this task should now have a cleared
        # xcom value
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
        # We *should* get a value using 'include_prior_dates'
        self.assertEqual(
            ti.xcom_pull(task_ids='test_xcom',
                         key=key,
                         include_prior_dates=True), value)
Пример #34
0
    def test_operator_clear(self):
        dag = DAG('test_operator_clear', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        t1 = DummyOperator(task_id='bash_op', owner='test', dag=dag)
        t2 = DummyOperator(task_id='dummy_op', owner='test', dag=dag, retries=1)

        t2.set_upstream(t1)

        ti1 = TI(task=t1, execution_date=DEFAULT_DATE)
        ti2 = TI(task=t2, execution_date=DEFAULT_DATE)
        ti2.run()
        # Dependency not met
        self.assertEqual(ti2.try_number, 1)
        self.assertEqual(ti2.max_tries, 1)

        t2.clear(upstream=True)
        ti1.run()
        ti2.run()
        self.assertEqual(ti1.try_number, 2)
        # max_tries is 0 because there is no task instance in db for ti1
        # so clear won't change the max_tries.
        self.assertEqual(ti1.max_tries, 0)
        self.assertEqual(ti2.try_number, 2)
        # try_number (0) + retries(1)
        self.assertEqual(ti2.max_tries, 1)
Пример #35
0
    def test_clear_task_instances_without_task(self):
        dag = DAG(
            'test_clear_task_instances_without_task',
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + datetime.timedelta(days=10),
        )
        task0 = DummyOperator(task_id='task0', owner='test', dag=dag)
        task1 = DummyOperator(task_id='task1',
                              owner='test',
                              dag=dag,
                              retries=2)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)

        dag.create_dagrun(
            execution_date=ti0.execution_date,
            state=State.RUNNING,
            run_type=DagRunType.SCHEDULED,
        )

        ti0.run()
        ti1.run()

        # Remove the task from dag.
        dag.task_dict = {}
        assert not dag.has_task(task0.task_id)
        assert not dag.has_task(task1.task_id)

        with create_session() as session:
            qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all()
            clear_task_instances(qry, session)

        # When dag is None, max_tries will be maximum of original max_tries or try_number.
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # Next try to run will be try 2
        assert ti0.try_number == 2
        assert ti0.max_tries == 1
        assert ti1.try_number == 2
        assert ti1.max_tries == 2
Пример #36
0
    def test_operator_clear(self):
        dag = DAG(
            'test_operator_clear',
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + datetime.timedelta(days=10),
        )
        op1 = DummyOperator(task_id='bash_op', owner='test', dag=dag)
        op2 = DummyOperator(task_id='dummy_op', owner='test', dag=dag, retries=1)

        op2.set_upstream(op1)

        ti1 = TI(task=op1, execution_date=DEFAULT_DATE)
        ti2 = TI(task=op2, execution_date=DEFAULT_DATE)

        dag.create_dagrun(
            execution_date=ti1.execution_date,
            state=State.RUNNING,
            run_type=DagRunType.SCHEDULED,
        )

        ti2.run()
        # Dependency not met
        self.assertEqual(ti2.try_number, 1)
        self.assertEqual(ti2.max_tries, 1)

        op2.clear(upstream=True)
        ti1.run()
        ti2.run(ignore_ti_state=True)
        self.assertEqual(ti1.try_number, 2)
        # max_tries is 0 because there is no task instance in db for ti1
        # so clear won't change the max_tries.
        self.assertEqual(ti1.max_tries, 0)
        self.assertEqual(ti2.try_number, 2)
        # try_number (0) + retries(1)
        self.assertEqual(ti2.max_tries, 1)
 def test_mark_non_runnable_task_as_success(self):
     """
     test that running task with mark_success param update task state
     as SUCCESS without running task despite it fails dependency checks.
     """
     non_runnable_state = (
         set(State.task_states) - RUNNABLE_STATES - set(State.SUCCESS)).pop()
     dag = models.DAG(dag_id='test_mark_non_runnable_task_as_success')
     task = DummyOperator(
         task_id='test_mark_non_runnable_task_as_success_op',
         dag=dag,
         pool='test_pool',
         owner='airflow',
         start_date=timezone.datetime(2016, 2, 1, 0, 0, 0))
     ti = TI(
         task=task, execution_date=timezone.utcnow(), state=non_runnable_state)
     # TI.run() will sync from DB before validating deps.
     with create_session() as session:
         session.add(ti)
         session.commit()
     ti.run(mark_success=True)
     self.assertEqual(ti.state, State.SUCCESS)
Пример #38
0
    def test_pickle_file_transfer_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="put_test_task",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        assert put_test_task is not None
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="check_file_task",
            ssh_hook=self.hook,
            command=f"cat {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert check_file_task is not None
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        assert (ti3.xcom_pull(
            task_ids=check_file_task.task_id,
            key='return_value').strip() == test_local_file_content)
Пример #39
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(task_id='test_xcom',
                             dag=dag,
                             pool='test_xcom',
                             owner='airflow',
                             start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
Пример #40
0
def task_test(args, dag=None):
    """Tests task for a given dag_id"""
    # We want to log output from operators etc to show up here. Normally
    # airflow.task would redirect to a file, but here we want it to propagate
    # up to the normal airflow handler.
    handlers = logging.getLogger('airflow.task').handlers
    already_has_stream_handler = False
    for handler in handlers:
        already_has_stream_handler = isinstance(handler, logging.StreamHandler)
        if already_has_stream_handler:
            break
    if not already_has_stream_handler:
        logging.getLogger('airflow.task').propagate = True

    dag = dag or get_dag(args.subdir, args.dag_id)

    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    try:
        if args.dry_run:
            ti.dry_run()
        else:
            ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True)
    except Exception:  # pylint: disable=broad-except
        if args.post_mortem:
            debugger = _guess_debugger()
            debugger.post_mortem()
        else:
            raise
    finally:
        if not already_has_stream_handler:
            # Make sure to reset back to normal. When run for CLI this doesn't
            # matter, but it does for test suite
            logging.getLogger('airflow.task').propagate = False
Пример #41
0
def test_postgres_adapter_comments():
    session = _create_postgres_session()

    dag = viewflow.create_dag(
        "./tests/projects/postgresql/simple_dag_comments")
    task = dag.get_task("task_1")
    ti = TaskInstance(task, datetime(2020, 1, 1))

    with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("DROP TABLE IF EXISTS viewflow.task_1")

    ti.run(ignore_task_deps=True,
           ignore_ti_state=True,
           test_mode=True,
           session=session)

    with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn:
        with conn.cursor() as cur:
            sql_table_comments = """
            SELECT cols.table_name,  
                   cols.column_name, 
                   pg_catalog.col_description(c.oid, cols.ordinal_position::int)
              FROM pg_catalog.pg_class c, information_schema.columns cols
             WHERE cols.table_catalog = 'viewflow' AND cols.table_schema = 'viewflow'
               AND cols.table_name = 'task_1'
               AND cols.table_name = c.relname;
            """
            cur.execute(sql_table_comments)
            results = cur.fetchall()
            assert ("task_1", "user_id", "User ID") in results
            assert ("task_1", "email", "Email") in results

        with conn.cursor() as cur:
            sql_table_description = (
                "select obj_description('viewflow.task_1'::regclass);")
            cur.execute(sql_table_description)
            results = cur.fetchall()
            assert ("Description", ) in results
Пример #42
0
    def test_dag_clear(self):
        dag = DAG('test_dag_clear',
                  start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='test_dag_clear_task_0',
                              owner='test',
                              dag=dag)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        # Next try to run will be try 1
        self.assertEqual(ti0.try_number, 1)
        ti0.run()
        self.assertEqual(ti0.try_number, 2)
        dag.clear()
        ti0.refresh_from_db()
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.state, State.NONE)
        self.assertEqual(ti0.max_tries, 1)

        task1 = DummyOperator(task_id='test_dag_clear_task_1',
                              owner='test',
                              dag=dag,
                              retries=2)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)
        self.assertEqual(ti1.max_tries, 2)
        ti1.try_number = 1
        # Next try will be 2
        ti1.run()
        self.assertEqual(ti1.try_number, 3)
        self.assertEqual(ti1.max_tries, 2)

        dag.clear()
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # after clear dag, ti2 should show attempt 3 of 5
        self.assertEqual(ti1.max_tries, 4)
        self.assertEqual(ti1.try_number, 3)
        # after clear dag, ti1 should show attempt 2 of 2
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.max_tries, 1)
Пример #43
0
    def test_clear_task_instances(self):
        dag = DAG('test_clear_task_instances',
                  start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='0', owner='test', dag=dag)
        task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)

        ti0.run()
        ti1.run()
        session = settings.Session()
        qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all()
        clear_task_instances(qry, session, dag=dag)
        session.commit()
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # Next try to run will be try 2
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.max_tries, 1)
        self.assertEqual(ti1.try_number, 2)
        self.assertEqual(ti1.max_tries, 3)
Пример #44
0
    def test_file_transfer_with_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath_int_dir,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        assert get_test_task is not None
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath_int_dir) as file:
            content_received = file.read()
        assert content_received.strip() == test_remote_file_content
Пример #45
0
    def test_file_transfer_with_intermediate_dir_error_get(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(task_id="test_create_file",
                                       ssh_hook=self.hook,
                                       command="echo '{0}' > {1}".format(
                                           test_remote_file_content,
                                           self.test_remote_filepath),
                                       do_xcom_push=True,
                                       dag=self.dag)
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath_int_dir,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            create_intermediate_dirs=True,
            dag=self.dag)
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath_int_dir, 'r') as f:
            content_received = f.read()
        self.assertEqual(content_received.strip(), test_remote_file_content)
Пример #46
0
    def test_post_execute_hook(self):
        """
        Test that post_execute hook is called with the Operator's result.
        The result ('error') will cause an error to be raised and trapped.
        """
        class TestError(Exception):
            pass

        class TestOperator(PythonOperator):
            def post_execute(self, context, result):
                if result == 'error':
                    raise TestError('expected error.')

        dag = models.DAG(dag_id='test_post_execute_dag')
        task = TestOperator(task_id='test_operator',
                            dag=dag,
                            python_callable=lambda: 'error',
                            owner='airflow',
                            start_date=datetime.datetime(2017, 2, 1))
        ti = TI(task=task, execution_date=datetime.datetime.now())

        with self.assertRaises(TestError):
            ti.run()
Пример #47
0
    def test_clear_task_instances_without_dag(self):
        dag = DAG('test_clear_task_instances_without_dag', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        task0 = DummyOperator(task_id='task_0', owner='test', dag=dag)
        task1 = DummyOperator(task_id='task_1', owner='test', dag=dag, retries=2)
        ti0 = TI(task=task0, execution_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)
        ti0.run()
        ti1.run()

        with create_session() as session:
            qry = session.query(TI).filter(
                TI.dag_id == dag.dag_id).all()
            clear_task_instances(qry, session)

        # When dag is None, max_tries will be maximum of original max_tries or try_number.
        ti0.refresh_from_db()
        ti1.refresh_from_db()
        # Next try to run will be try 2
        self.assertEqual(ti0.try_number, 2)
        self.assertEqual(ti0.max_tries, 1)
        self.assertEqual(ti1.try_number, 2)
        self.assertEqual(ti1.max_tries, 2)
Пример #48
0
    def test_file_transfer_with_intermediate_dir_put(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath_int_dir,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag)
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(task_id="test_check_file",
                                      ssh_hook=self.hook,
                                      command="cat {0}".format(
                                          self.test_remote_filepath_int_dir),
                                      do_xcom_push=True,
                                      dag=self.dag)
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file',
                          key='return_value').strip(), test_local_file_content)
Пример #49
0
def task_test(args, dag=None):
    """Tests task for a given dag_id"""
    # We want log outout from operators etc to show up here. Normally
    # airflow.task would redirect to a file, but here we want it to propagate
    # up to the normal airflow handler.
    handlers = logging.getLogger('airflow.task').handlers
    already_has_stream_handler = False
    for handler in handlers:
        already_has_stream_handler = isinstance(handler, logging.StreamHandler)
        if already_has_stream_handler:
            break
    if not already_has_stream_handler:
        logging.getLogger('airflow.task').propagate = True

    dag = dag or get_dag(args)

    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    try:
        if args.dry_run:
            ti.dry_run()
        else:
            ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True)
    except Exception:  # pylint: disable=broad-except
        if args.post_mortem:
            try:
                debugger = importlib.import_module("ipdb")
            except ImportError:
                debugger = importlib.import_module("pdb")
            debugger.post_mortem()
        else:
            raise
    def test_not_requeue_non_requeueable_task_instance(self):
        dag = models.DAG(dag_id='test_not_requeue_non_requeueable_task_instance')
        # Use BaseSensorOperator because sensor got
        # one additional DEP in BaseSensorOperator().deps
        task = BaseSensorOperator(
            task_id='test_not_requeue_non_requeueable_task_instance_op',
            dag=dag,
            pool='test_pool',
            owner='airflow',
            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0))
        ti = TI(
            task=task, execution_date=timezone.utcnow(), state=State.QUEUED)
        with create_session() as session:
            session.add(ti)
            session.commit()

        all_deps = RUNNING_DEPS | task.deps
        all_non_requeueable_deps = all_deps - REQUEUEABLE_DEPS
        patch_dict = {}
        for dep in all_non_requeueable_deps:
            class_name = dep.__class__.__name__
            dep_patch = patch('%s.%s.%s' % (dep.__module__, class_name,
                                            dep._get_dep_statuses.__name__))
            method_patch = dep_patch.start()
            method_patch.return_value = iter([TIDepStatus('mock_' + class_name, True,
                                                          'mock')])
            patch_dict[class_name] = (dep_patch, method_patch)

        for class_name, (dep_patch, method_patch) in patch_dict.items():
            method_patch.return_value = iter(
                [TIDepStatus('mock_' + class_name, False, 'mock')])
            ti.run()
            self.assertEqual(ti.state, State.QUEUED)
            dep_patch.return_value = TIDepStatus('mock_' + class_name, True, 'mock')

        for (dep_patch, method_patch) in patch_dict.values():
            dep_patch.stop()
Пример #51
0
    def test_track_python_operator(self):
        args = dict(start_date=days_ago(2))

        with DAG(dag_id="test_dag",
                 default_args=args,
                 schedule_interval=timedelta(minutes=1)):
            run_this = PythonOperator(
                task_id="print_the_context",
                provide_context=True,
                python_callable=_test_func,
            )
        track_task(run_this)
        #
        # env = {
        #     "AIRFLOW_CTX_DAG_ID": "test_dag",
        #     "AIRFLOW_CTX_EXECUTION_DATE": "emr_task",
        #     "AIRFLOW_CTX_TASK_ID": "1970-01-01T0000.000",
        #     "AIRFLOW_CTX_TRY_NUMBER": "1",
        #     "AIRFLOW_CTX_UID": get_airflow_instance_uid(),
        # }
        #
        # with mock.patch.dict(os.environ, env):
        ti = TaskInstance(run_this, utcnow())
        ti.run(ignore_depends_on_past=True, ignore_ti_state=True)
Пример #52
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag,
            )
            self.assertIsNotNone(get_test_task)
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
Пример #53
0
def test_postgres_adapter(monkeypatch):
    session = _create_postgres_session()

    dag = viewflow.create_dag("./tests/projects/postgresql/simple_dag")
    task = dag.get_task("task_1")
    ti = TaskInstance(task, datetime(2020, 1, 1))

    with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("DROP TABLE IF EXISTS viewflow.task_1")

    ti.run(ignore_task_deps=True,
           ignore_ti_state=True,
           test_mode=True,
           session=session)

    with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT COUNT(*) FROM viewflow.task_1")
            (count, ) = cur.fetchone()
            assert count == 8
            cur.execute("SELECT __view_generated_at FROM viewflow.task_1")
            res = cur.fetchone()
            assert len(res) == 1
    def test_ti_updates_with_task(self, session=None):
        """
        test that updating the executor_config propogates to the TaskInstance DB
        """
        dag = models.DAG(dag_id='test_run_pooling_task')
        task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, owner='airflow',
                             executor_config={'foo': 'bar'},
                             start_date=timezone.datetime(2016, 2, 1, 0, 0, 0))
        ti = TI(
            task=task, execution_date=timezone.utcnow())

        ti.run(session=session)
        tis = dag.get_task_instances()
        self.assertEqual({'foo': 'bar'}, tis[0].executor_config)

        task2 = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, owner='airflow',
                              executor_config={'bar': 'baz'},
                              start_date=timezone.datetime(2016, 2, 1, 0, 0, 0))

        ti = TI(
            task=task2, execution_date=timezone.utcnow())
        ti.run(session=session)
        tis = dag.get_task_instances()
        self.assertEqual({'bar': 'baz'}, tis[1].executor_config)
Пример #55
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with pytest.raises(Exception) as ctx:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag,
            )
            assert get_test_task is not None
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        assert 'No such file' in str(ctx.value)
Пример #56
0
def run(args):

    utils.pessimistic_connection_handling()
    # Setting up logging
    log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER'))
    directory = log + "/{args.dag_id}/{args.task_id}".format(args=args)
    if not os.path.exists(directory):
        os.makedirs(directory)
    args.execution_date = dateutil.parser.parse(args.execution_date)
    iso = args.execution_date.isoformat()
    filename = "{directory}/{iso}".format(**locals())

    # store old log (to help with S3 appends)
    if os.path.exists(filename):
        with open(filename, 'r') as logfile:
            old_log = logfile.read()
    else:
        old_log = None

    subdir = None
    if args.subdir:
        subdir = args.subdir.replace(
            "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER"))
        subdir = os.path.expanduser(subdir)
    logging.basicConfig(
        filename=filename,
        level=settings.LOGGING_LEVEL,
        format=settings.LOG_FORMAT)
    if not args.pickle:
        dagbag = DagBag(subdir)
        if args.dag_id not in dagbag.dags:
            msg = 'DAG [{0}] could not be found'.format(args.dag_id)
            logging.error(msg)
            raise AirflowException(msg)
        dag = dagbag.dags[args.dag_id]
        task = dag.get_task(task_id=args.task_id)
    else:
        session = settings.Session()
        logging.info('Loading pickle id {args.pickle}'.format(**locals()))
        dag_pickle = session.query(
            DagPickle).filter(DagPickle.id == args.pickle).first()
        if not dag_pickle:
            raise AirflowException("Who hid the pickle!? [missing pickle]")
        dag = dag_pickle.pickle
        task = dag.get_task(task_id=args.task_id)

    task_start_date = None
    if args.task_start_date:
        task_start_date = dateutil.parser.parse(args.task_start_date)
        task.start_date = task_start_date
    ti = TaskInstance(task, args.execution_date)

    if args.local:
        print("Logging into: " + filename)
        run_job = jobs.LocalTaskJob(
            task_instance=ti,
            mark_success=args.mark_success,
            force=args.force,
            pickle_id=args.pickle,
            task_start_date=task_start_date,
            ignore_dependencies=args.ignore_dependencies)
        run_job.run()
    elif args.raw:
        ti.run(
            mark_success=args.mark_success,
            force=args.force,
            ignore_dependencies=args.ignore_dependencies,
            job_id=args.job_id,
        )
    else:
        pickle_id = None
        if args.ship_dag:
            try:
                # Running remotely, so pickling the DAG
                session = settings.Session()
                pickle = DagPickle(dag)
                session.add(pickle)
                session.commit()
                pickle_id = pickle.id
                print((
                    'Pickled dag {dag} '
                    'as pickle_id:{pickle_id}').format(**locals()))
            except Exception as e:
                print('Could not pickle the DAG')
                print(e)
                raise e

        executor = DEFAULT_EXECUTOR
        executor.start()
        print("Sending to executor.")
        executor.queue_task_instance(
            ti,
            mark_success=args.mark_success,
            pickle_id=pickle_id,
            ignore_dependencies=args.ignore_dependencies,
            force=args.force)
        executor.heartbeat()
        executor.end()

    if conf.get('core', 'S3_LOG_FOLDER').startswith('s3:'):
        import boto
        s3_log = filename.replace(log, conf.get('core', 'S3_LOG_FOLDER'))
        bucket, key = s3_log.lstrip('s3:/').split('/', 1)
        if os.path.exists(filename):

            # get logs
            with open(filename, 'r') as logfile:
                new_log = logfile.read()

            # remove old logs (since they are already in S3)
            if old_log:
                new_log.replace(old_log, '')

            try:
                s3 = boto.connect_s3()
                s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key)

                # append new logs to old S3 logs, if available
                if s3_key.exists():
                    old_s3_log = s3_key.get_contents_as_string().decode()
                    new_log = old_s3_log + '\n' + new_log

                # send log to S3
                s3_key.set_contents_from_string(new_log)
            except:
                print('Could not send logs to S3.')
Пример #57
0
def run(args):

    utils.pessimistic_connection_handling()
    # Setting up logging
    log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER'))
    directory = log + "/{args.dag_id}/{args.task_id}".format(args=args)
    if not os.path.exists(directory):
        os.makedirs(directory)
    args.execution_date = dateutil.parser.parse(args.execution_date)
    iso = args.execution_date.isoformat()
    filename = "{directory}/{iso}".format(**locals())
    subdir = None
    if args.subdir:
        subdir = args.subdir.replace(
            "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER"))
        subdir = os.path.expanduser(subdir)
    logging.basicConfig(
        filename=filename,
        level=settings.LOGGING_LEVEL,
        format=settings.LOG_FORMAT)
    if not args.pickle:
        dagbag = DagBag(subdir)
        if args.dag_id not in dagbag.dags:
            msg = 'DAG [{0}] could not be found'.format(args.dag_id)
            logging.error(msg)
            raise AirflowException(msg)
        dag = dagbag.dags[args.dag_id]
        task = dag.get_task(task_id=args.task_id)
    else:
        session = settings.Session()
        logging.info('Loading pickle id {args.pickle}'.format(**locals()))
        dag_pickle = session.query(
            DagPickle).filter(DagPickle.id == args.pickle).first()
        if not dag_pickle:
            raise AirflowException("Who hid the pickle!? [missing pickle]")
        dag = dag_pickle.pickle
        task = dag.get_task(task_id=args.task_id)

    task_start_date = None
    if args.task_start_date:
        task_start_date = dateutil.parser.parse(args.task_start_date)
        task.start_date = task_start_date
    ti = TaskInstance(task, args.execution_date)

    if args.local:
        print("Logging into: " + filename)
        run_job = jobs.LocalTaskJob(
            task_instance=ti,
            mark_success=args.mark_success,
            force=args.force,
            pickle_id=args.pickle,
            task_start_date=task_start_date,
            ignore_dependencies=args.ignore_dependencies)
        run_job.run()
    elif args.raw:
        ti.run(
            mark_success=args.mark_success,
            force=args.force,
            ignore_dependencies=args.ignore_dependencies,
            job_id=args.job_id,
        )
    else:
        pickle_id = None
        if args.ship_dag:
            try:
                # Running remotely, so pickling the DAG
                session = settings.Session()
                pickle = DagPickle(dag)
                session.add(pickle)
                session.commit()
                pickle_id = pickle.id
                print((
                    'Pickled dag {dag} '
                    'as pickle_id:{pickle_id}').format(**locals()))
            except Exception as e:
                print('Could not pickle the DAG')
                print(e)
                raise e

        executor = DEFAULT_EXECUTOR
        executor.start()
        print("Sending to executor.")
        executor.queue_task_instance(
            ti,
            mark_success=args.mark_success,
            pickle_id=pickle_id,
            ignore_dependencies=args.ignore_dependencies,
            force=args.force)
        executor.heartbeat()
        executor.end()
Пример #58
0
    def test_xcom_push_and_pull(self, mock_conn, mock_run_query, mock_check_query_status):
        ti = TaskInstance(task=self.athena, execution_date=timezone.utcnow())
        ti.run()

        self.assertEqual(ti.xcom_pull(task_ids='test_aws_athena_operator'),
                         ATHENA_QUERY_ID)
Пример #59
0
def run(args, dag=None):
    db_utils.pessimistic_connection_handling()
    if dag:
        args.dag_id = dag.dag_id

    # Setting up logging
    log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER'))
    directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args)
    if not os.path.exists(directory):
        os.makedirs(directory)
    iso = args.execution_date.isoformat()
    filename = "{directory}/{iso}".format(**locals())

    logging.root.handlers = []
    logging.basicConfig(
        filename=filename,
        level=settings.LOGGING_LEVEL,
        format=settings.LOG_FORMAT)

    if not args.pickle and not dag:
        dag = get_dag(args)
    elif not dag:
        session = settings.Session()
        logging.info('Loading pickle id {args.pickle}'.format(**locals()))
        dag_pickle = session.query(
            DagPickle).filter(DagPickle.id == args.pickle).first()
        if not dag_pickle:
            raise AirflowException("Who hid the pickle!? [missing pickle]")
        dag = dag_pickle.pickle
    task = dag.get_task(task_id=args.task_id)

    ti = TaskInstance(task, args.execution_date)

    if args.local:
        print("Logging into: " + filename)
        run_job = jobs.LocalTaskJob(
            task_instance=ti,
            mark_success=args.mark_success,
            force=args.force,
            pickle_id=args.pickle,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            pool=args.pool)
        run_job.run()
    elif args.raw:
        ti.run(
            mark_success=args.mark_success,
            force=args.force,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            job_id=args.job_id,
            pool=args.pool,
        )
    else:
        pickle_id = None
        if args.ship_dag:
            try:
                # Running remotely, so pickling the DAG
                session = settings.Session()
                pickle = DagPickle(dag)
                session.add(pickle)
                session.commit()
                pickle_id = pickle.id
                print((
                    'Pickled dag {dag} '
                    'as pickle_id:{pickle_id}').format(**locals()))
            except Exception as e:
                print('Could not pickle the DAG')
                print(e)
                raise e

        executor = DEFAULT_EXECUTOR
        executor.start()
        print("Sending to executor.")
        executor.queue_task_instance(
            ti,
            mark_success=args.mark_success,
            pickle_id=pickle_id,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            force=args.force,
            pool=args.pool)
        executor.heartbeat()
        executor.end()

    # Force the log to flush, and set the handler to go back to normal so we
    # don't continue logging to the task's log file. The flush is important
    # because we subsequently read from the log to insert into S3 or Google
    # cloud storage.
    logging.root.handlers[0].flush()
    logging.root.handlers = []

    # store logs remotely
    remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER')

    # deprecated as of March 2016
    if not remote_base and conf.get('core', 'S3_LOG_FOLDER'):
        warnings.warn(
            'The S3_LOG_FOLDER conf key has been replaced by '
            'REMOTE_BASE_LOG_FOLDER. Your conf still works but please '
            'update airflow.cfg to ensure future compatibility.',
            DeprecationWarning)
        remote_base = conf.get('core', 'S3_LOG_FOLDER')

    if os.path.exists(filename):
        # read log and remove old logs to get just the latest additions

        with open(filename, 'r') as logfile:
            log = logfile.read()

        remote_log_location = filename.replace(log_base, remote_base)
        # S3
        if remote_base.startswith('s3:/'):
            logging_utils.S3Log().write(log, remote_log_location)
        # GCS
        elif remote_base.startswith('gs:/'):
            logging_utils.GCSLog().write(
                log,
                remote_log_location,
                append=True)
        # Other
        elif remote_base and remote_base != 'None':
            logging.error(
                'Unsupported remote log location: {}'.format(remote_base))
Пример #60
0
    def test_xcom_push_and_pull(self, mock_conn, mock_run_query):
        ti = TaskInstance(task=self.operator, execution_date=timezone.utcnow())
        ti.run()

        assert ti.xcom_pull(task_ids=MOCK_DATA['task_id']) == str(MOCK_RESULT)