def test_hive_dryrun(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "tst"

        op = HiveOperator(task_id='dry_run_basic_hql',
                          hql=self.hql,
                          dag=self.dag)
        op.dry_run()

        hive_cmd = [
            'beeline', '-u', '"jdbc:hive2://localhost:10000/default"',
            '-hiveconf', 'airflow.ctx.dag_id=', '-hiveconf',
            'airflow.ctx.task_id=', '-hiveconf', 'airflow.ctx.execution_date=',
            '-hiveconf', 'airflow.ctx.dag_run_id=', '-hiveconf',
            'airflow.ctx.dag_owner=', '-hiveconf', 'airflow.ctx.dag_email=',
            '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf',
            'mapred.job.queue.name=airflow', '-hiveconf',
            'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_tst/tmptst'
        ]
        mock_popen.assert_called_with(hive_cmd,
                                      stdout=mock_subprocess.PIPE,
                                      stderr=mock_subprocess.STDOUT,
                                      cwd="/tmp/airflow_hiveop_tst",
                                      close_fds=True)
    def test_beeline(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "tst"

        hive_cmd = [
            'beeline', '-u', '"jdbc:hive2://localhost:10000/default"',
            '-hiveconf', 'airflow.ctx.dag_id=test_dag_id', '-hiveconf',
            'airflow.ctx.task_id=beeline_hql', '-hiveconf',
            'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
            '-hiveconf', 'airflow.ctx.dag_run_id=', '-hiveconf',
            'airflow.ctx.dag_owner=airflow', '-hiveconf',
            'airflow.ctx.dag_email=', '-hiveconf',
            'mapreduce.job.queuename=airflow', '-hiveconf',
            'mapred.job.queue.name=airflow', '-hiveconf',
            'tez.queue.name=airflow', '-hiveconf',
            'mapred.job.name=test_job_name', '-f',
            '/tmp/airflow_hiveop_tst/tmptst'
        ]

        op = HiveOperator(task_id='beeline_hql',
                          hive_cli_conn_id='hive_cli_default',
                          hql=self.hql,
                          dag=self.dag,
                          mapred_job_name="test_job_name")
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)
        mock_popen.assert_called_with(hive_cmd,
                                      stdout=mock_subprocess.PIPE,
                                      stderr=mock_subprocess.STDOUT,
                                      cwd="/tmp/airflow_hiveop_tst",
                                      close_fds=True)
示例#3
0
    def test_run_cli(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_run_cli"

        with mock.patch.dict('os.environ', {
            'AIRFLOW_CTX_DAG_ID': 'test_dag_id',
            'AIRFLOW_CTX_TASK_ID': 'test_task_id',
            'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00',
            'AIRFLOW_CTX_DAG_RUN_ID': '55',
            'AIRFLOW_CTX_DAG_OWNER': 'airflow',
            'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**',
        }):

            hook = MockHiveCliHook()
            hook.run_cli("SHOW DATABASES")

        hive_cmd = ['beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf',
                    'airflow.ctx.dag_id=test_dag_id', '-hiveconf', 'airflow.ctx.task_id=test_task_id',
                    '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf',
                    'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow',
                    '-hiveconf', '[email protected]', '-hiveconf',
                    'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow',
                    '-hiveconf', 'tez.queue.name=airflow', '-f',
                    '/tmp/airflow_hiveop_test_run_cli/tmptest_run_cli']

        mock_popen.assert_called_with(
            hive_cmd,
            stdout=mock_subprocess.PIPE,
            stderr=mock_subprocess.STDOUT,
            cwd="/tmp/airflow_hiveop_test_run_cli",
            close_fds=True
        )
示例#4
0
    def test_mysql_to_hive_tblproperties(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        with mock.patch.dict('os.environ', self.env_vars):
            sql = "SELECT * FROM baby_names LIMIT 1000;"
            op = MySqlToHiveOperator(
                task_id='test_m2h',
                hive_cli_conn_id='hive_cli_default',
                sql=sql,
                hive_table='test_mysql_to_hive',
                recreate=True,
                delimiter=",",
                tblproperties={'test_property': 'test_value'},
                dag=self.dag)
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE, ignore_ti_state=True)

        hive_cmd = ['beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf',
                    'airflow.ctx.dag_id=unit_test_dag', '-hiveconf', 'airflow.ctx.task_id=test_m2h',
                    '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf',
                    'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow',
                    '-hiveconf', '[email protected]', '-hiveconf',
                    'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow',
                    '-hiveconf', 'tez.queue.name=airflow',
                    '-f', '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive']

        mock_popen.assert_called_with(
            hive_cmd,
            stdout=mock_subprocess.PIPE,
            stderr=mock_subprocess.STDOUT,
            cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
            close_fds=True
        )
示例#5
0
    def test_execute_bteq_runcmd_return_last_line(self, mock_tmpfile,
                                                  mock_tmpdir, mock_popen):
        # Givens
        mock_subprocess = MockSubProcess(output=self._bteq_subprocess_output)
        mock_subprocess.returncode = 0
        mock_popen.return_value = mock_subprocess
        mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq'
        mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq'

        # When
        hook = TtuHook(ttu_conn_id='ttu_default')

        # Then
        res = hook.execute_bteq(bteq="", xcom_push_flag=True)
        self.assertEqual(
            "*** RC (return code) = 0",
            res,
        )
示例#6
0
    def test_execute_bteq_runcmd(self, mock_tmpfile, mock_tmpdir, mock_popen):
        # Given
        mock_subprocess = MockSubProcess()
        mock_subprocess.returncode = 0
        mock_popen.return_value = mock_subprocess
        mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq'
        mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq'
        # When
        hook = TtuHook(ttu_conn_id='ttu_default')
        hook.execute_bteq(bteq="")

        # Then
        mock_popen.assert_called_with(['bteq'],
                                      stdin=mock.ANY,
                                      stdout=mock_subprocess.PIPE,
                                      stderr=mock_subprocess.STDOUT,
                                      cwd='/tmp/airflowtmp_ttu_bteq',
                                      preexec_fn=mock.ANY)
示例#7
0
    def test_execute_bteq_runcmd_error_noraise(self, mock_tmpfile, mock_tmpdir,
                                               mock_popen):
        # Givens
        mock_subprocess = MockSubProcess(
            output=self._bteq_error_no_failure_subprocess_output)
        mock_subprocess.returncode = 0
        mock_popen.return_value = mock_subprocess
        mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq'
        mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq'

        # When
        hook = TtuHook(ttu_conn_id='ttu_default')

        # Then
        with self.assertLogs(level="INFO") as cm:
            hook.execute_bteq(bteq="")
        self.assertEqual(
            "INFO:airflow.providers.teradata.hooks.ttu.TtuHook:BTEQ command exited with return code 0",
            cm.output[-1],
        )
示例#8
0
    def test_execute_bteq_runcmd_error_raise(self, mock_tmpfile, mock_tmpdir,
                                             mock_popen):
        # Given
        mock_subprocess = MockSubProcess(
            output=self._bteq_failure_subprocess_output)
        mock_subprocess.returncode = 311
        mock_popen.return_value = mock_subprocess
        mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq'
        mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq'

        # When
        hook = TtuHook(ttu_conn_id='ttu_default')

        # Then
        with self.assertRaises(AirflowException) as cm:
            hook.execute_bteq(bteq="")
        msg = (
            "BTEQ command exited with return code 311 because of "
            "*** Failure 3706 Syntax error: expected something between '(' and the string 'test'"
        )
        self.assertEqual(str(cm.exception), msg)
    def test_mysql_to_hive_partition(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive_part"

        with mock.patch.dict('os.environ', self.env_vars):
            sql = "SELECT * FROM baby_names LIMIT 1000;"
            t = MySqlToHiveTransfer(task_id='test_m2h',
                                    hive_cli_conn_id='beeline_default',
                                    sql=sql,
                                    hive_table='test_mysql_to_hive_part',
                                    partition={'ds': DEFAULT_DATE_DS},
                                    recreate=False,
                                    create=True,
                                    delimiter=",",
                                    dag=self.dag)
            t.run(start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE,
                  ignore_ti_state=True)
        hive_cmd = [
            u'beeline', u'-u', u'"jdbc:hive2://localhost:10000/default"',
            u'-hiveconf', u'[email protected]',
            u'-hiveconf', u'airflow.ctx.dag_id=test_dag_id', u'-hiveconf',
            u'airflow.ctx.dag_owner=airflow', u'-hiveconf',
            u'airflow.ctx.dag_run_id=55', u'-hiveconf',
            u'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
            u'-hiveconf', u'airflow.ctx.task_id=test_task_id', u'-hiveconf',
            u'mapreduce.job.queuename=airflow', u'-hiveconf',
            u'mapred.job.queue.name=airflow', u'-hiveconf',
            u'tez.queue.name=airflow', u'-f',
            u'/tmp/airflow_hiveop_test_mysql_to_hive_part/tmptest_mysql_to_hive_part'
        ]

        mock_popen.assert_called_with(
            hive_cmd,
            stdout=mock_subprocess.PIPE,
            stderr=mock_subprocess.STDOUT,
            cwd=u"/tmp/airflow_hiveop_test_mysql_to_hive_part",
            close_fds=True)
    def test_run_cli_with_hive_conf(self, mock_popen):
        hql = (
            "set key;\n"
            "set airflow.ctx.dag_id;\nset airflow.ctx.dag_run_id;\n"
            "set airflow.ctx.task_id;\nset airflow.ctx.execution_date;\n"
        )

        dag_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_DAG_ID']['env_var_format']
        task_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_TASK_ID']['env_var_format']
        execution_date_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_EXECUTION_DATE'][
            'env_var_format'
        ]
        dag_run_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_DAG_RUN_ID'][
            'env_var_format'
        ]

        mock_output = [
            'Connecting to jdbc:hive2://localhost:10000/default',
            'log4j:WARN No appenders could be found for logger (org.apache.hive.jdbc.Utils).',
            'log4j:WARN Please initialize the log4j system properly.',
            'log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.',
            'Connected to: Apache Hive (version 1.2.1.2.3.2.0-2950)',
            'Driver: Hive JDBC (version 1.2.1.spark2)',
            'Transaction isolation: TRANSACTION_REPEATABLE_READ',
            '0: jdbc:hive2://localhost:10000/default> USE default;',
            'No rows affected (0.37 seconds)',
            '0: jdbc:hive2://localhost:10000/default> set key;',
            '+------------+--+',
            '|    set     |',
            '+------------+--+',
            '| key=value  |',
            '+------------+--+',
            '1 row selected (0.133 seconds)',
            '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.dag_id;',
            '+---------------------------------+--+',
            '|               set               |',
            '+---------------------------------+--+',
            '| airflow.ctx.dag_id=test_dag_id  |',
            '+---------------------------------+--+',
            '1 row selected (0.008 seconds)',
            '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.dag_run_id;',
            '+-----------------------------------------+--+',
            '|                   set                   |',
            '+-----------------------------------------+--+',
            '| airflow.ctx.dag_run_id=test_dag_run_id  |',
            '+-----------------------------------------+--+',
            '1 row selected (0.007 seconds)',
            '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.task_id;',
            '+-----------------------------------+--+',
            '|                set                |',
            '+-----------------------------------+--+',
            '| airflow.ctx.task_id=test_task_id  |',
            '+-----------------------------------+--+',
            '1 row selected (0.009 seconds)',
            '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.execution_date;',
            '+-------------------------------------------------+--+',
            '|                       set                       |',
            '+-------------------------------------------------+--+',
            '| airflow.ctx.execution_date=test_execution_date  |',
            '+-------------------------------------------------+--+',
            '1 row selected (0.006 seconds)',
            '0: jdbc:hive2://localhost:10000/default> ',
            '0: jdbc:hive2://localhost:10000/default> ',
            'Closing: 0: jdbc:hive2://localhost:10000/default',
            '',
        ]

        with mock.patch.dict(
            'os.environ',
            {
                dag_id_ctx_var_name: 'test_dag_id',
                task_id_ctx_var_name: 'test_task_id',
                execution_date_ctx_var_name: 'test_execution_date',
                dag_run_id_ctx_var_name: 'test_dag_run_id',
            },
        ):

            hook = MockHiveCliHook()
            mock_popen.return_value = MockSubProcess(output=mock_output)

            output = hook.run_cli(hql=hql, hive_conf={'key': 'value'})
            process_inputs = " ".join(mock_popen.call_args_list[0][0][0])

            self.assertIn('value', process_inputs)
            self.assertIn('test_dag_id', process_inputs)
            self.assertIn('test_task_id', process_inputs)
            self.assertIn('test_execution_date', process_inputs)
            self.assertIn('test_dag_run_id', process_inputs)

            self.assertIn('value', output)
            self.assertIn('test_dag_id', output)
            self.assertIn('test_task_id', output)
            self.assertIn('test_execution_date', output)
            self.assertIn('test_dag_run_id', output)
示例#11
0
    def test_mysql_to_hive_verify_loaded_values(self, mock_popen,
                                                mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        mysql_table = 'test_mysql_to_hive'
        hive_table = 'test_mysql_to_hive'

        hook = MySqlHook()

        try:
            minmax = (
                255,
                65535,
                16777215,
                4294967295,
                18446744073709551615,
                -128,
                -32768,
                -8388608,
                -2147483648,
                -9223372036854775808,
            )

            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
                conn.execute("""
                    CREATE TABLE {} (
                        c0 TINYINT   UNSIGNED,
                        c1 SMALLINT  UNSIGNED,
                        c2 MEDIUMINT UNSIGNED,
                        c3 INT       UNSIGNED,
                        c4 BIGINT    UNSIGNED,
                        c5 TINYINT,
                        c6 SMALLINT,
                        c7 MEDIUMINT,
                        c8 INT,
                        c9 BIGINT
                    )
                """.format(mysql_table))
                conn.execute("""
                    INSERT INTO {} VALUES (
                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {}
                    )
                """.format(mysql_table, *minmax))

            with mock.patch.dict('os.environ', self.env_vars):
                op = MySqlToHiveOperator(
                    task_id='test_m2h',
                    hive_cli_conn_id='hive_cli_default',
                    sql=f"SELECT * FROM {mysql_table}",
                    hive_table=hive_table,
                    recreate=True,
                    delimiter=",",
                    dag=self.dag,
                )
                op.run(start_date=DEFAULT_DATE,
                       end_date=DEFAULT_DATE,
                       ignore_ti_state=True)

                mock_cursor = MockConnectionCursor()
                mock_cursor.iterable = [minmax]
                hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor)

                result = hive_hook.get_records(f"SELECT * FROM {hive_table}")
                assert result[0] == minmax

                hive_cmd = [
                    'beeline',
                    '-u',
                    '"jdbc:hive2://localhost:10000/default"',
                    '-hiveconf',
                    'airflow.ctx.dag_id=unit_test_dag',
                    '-hiveconf',
                    'airflow.ctx.task_id=test_m2h',
                    '-hiveconf',
                    'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
                    '-hiveconf',
                    'airflow.ctx.dag_run_id=55',
                    '-hiveconf',
                    'airflow.ctx.dag_owner=airflow',
                    '-hiveconf',
                    '[email protected]',
                    '-hiveconf',
                    'mapreduce.job.queuename=airflow',
                    '-hiveconf',
                    'mapred.job.queue.name=airflow',
                    '-hiveconf',
                    'tez.queue.name=airflow',
                    '-f',
                    '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive',
                ]

                mock_popen.assert_called_with(
                    hive_cmd,
                    stdout=mock_subprocess.PIPE,
                    stderr=mock_subprocess.STDOUT,
                    cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
                    close_fds=True,
                )

        finally:
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
示例#12
0
    def test_mysql_to_hive_verify_csv_special_char(self, mock_popen,
                                                   mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        mysql_table = 'test_mysql_to_hive'
        hive_table = 'test_mysql_to_hive'

        hook = MySqlHook()

        try:
            db_record = ('c0', '["true"]')
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
                conn.execute("""
                    CREATE TABLE {} (
                        c0 VARCHAR(25),
                        c1 VARCHAR(25)
                    )
                """.format(mysql_table))
                conn.execute("""
                    INSERT INTO {} VALUES (
                        '{}', '{}'
                    )
                """.format(mysql_table, *db_record))

            with mock.patch.dict('os.environ', self.env_vars):
                import unicodecsv as csv

                op = MySqlToHiveOperator(
                    task_id='test_m2h',
                    hive_cli_conn_id='hive_cli_default',
                    sql=f"SELECT * FROM {mysql_table}",
                    hive_table=hive_table,
                    recreate=True,
                    delimiter=",",
                    quoting=csv.QUOTE_NONE,
                    quotechar='',
                    escapechar='@',
                    dag=self.dag,
                )
                op.run(start_date=DEFAULT_DATE,
                       end_date=DEFAULT_DATE,
                       ignore_ti_state=True)

                mock_cursor = MockConnectionCursor()
                mock_cursor.iterable = [('c0', '["true"]'), (2, 2)]
                hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor)

                result = hive_hook.get_records(f"SELECT * FROM {hive_table}")
            assert result[0] == db_record

            hive_cmd = [
                'beeline',
                '-u',
                '"jdbc:hive2://localhost:10000/default"',
                '-hiveconf',
                'airflow.ctx.dag_id=unit_test_dag',
                '-hiveconf',
                'airflow.ctx.task_id=test_m2h',
                '-hiveconf',
                'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
                '-hiveconf',
                'airflow.ctx.dag_run_id=55',
                '-hiveconf',
                'airflow.ctx.dag_owner=airflow',
                '-hiveconf',
                '[email protected]',
                '-hiveconf',
                'mapreduce.job.queuename=airflow',
                '-hiveconf',
                'mapred.job.queue.name=airflow',
                '-hiveconf',
                'tez.queue.name=airflow',
                '-f',
                '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive',
            ]

            mock_popen.assert_called_with(
                hive_cmd,
                stdout=mock_subprocess.PIPE,
                stderr=mock_subprocess.STDOUT,
                cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
                close_fds=True,
            )
        finally:
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")