Exemplo n.º 1
0
    def test_build_command(self):
        hook = SparkSqlHook(**self._config)

        # The subprocess requires an array but we build the cmd by joining on a space
        cmd = ' '.join(hook._prepare_command(""))

        # Check all the parameters
        assert "--executor-cores {}".format(
            self._config['executor_cores']) in cmd
        assert "--executor-memory {}".format(
            self._config['executor_memory']) in cmd
        assert "--keytab {}".format(self._config['keytab']) in cmd
        assert "--name {}".format(self._config['name']) in cmd
        assert "--num-executors {}".format(
            self._config['num_executors']) in cmd
        sql_path = get_after('-f', hook._prepare_command(""))
        assert self._config['sql'].strip() == sql_path

        # Check if all config settings are there
        for key_value in self._config['conf'].split(","):
            k, v = key_value.split('=')
            assert "--conf {0}={1}".format(k, v) in cmd

        if self._config['verbose']:
            assert "--verbose" in cmd
Exemplo n.º 2
0
    def test_spark_process_runcmd_with_list(self, mock_popen):
        # Given
        mock_popen.return_value.wait.return_value = 0

        # When
        hook = SparkSqlHook(conn_id='spark_default', sql='SELECT 1')
        hook.run_query(['--deploy-mode', 'cluster'])

        # Then
        self.assertEqual(
            mock_popen.mock_calls[0],
            call(
                [
                    'spark-sql',
                    '-e',
                    'SELECT 1',
                    '--master',
                    'yarn',
                    '--name',
                    'default-name',
                    '--verbose',
                    '--queue',
                    'default',
                    '--deploy-mode',
                    'cluster',
                ],
                stderr=-2,
                stdout=-1,
            ),
        )
    def test_spark_process_runcmd(self, mock_popen):
        # Given
        mock_popen.return_value.stdout = io.StringIO(
            'Spark-sql communicates using stdout')
        mock_popen.return_value.stderr = io.StringIO('stderr')
        mock_popen.return_value.wait.return_value = 0

        # When
        hook = SparkSqlHook(conn_id='spark_default', sql='SELECT 1')
        with patch.object(hook.log, 'debug') as mock_debug:
            with patch.object(hook.log, 'info') as mock_info:
                hook.run_query()
                mock_debug.assert_called_once_with('Spark-Sql cmd: %s', [
                    'spark-sql', '-e', 'SELECT 1', '--master', 'yarn',
                    '--name', 'default-name', '--verbose', '--queue', 'default'
                ])
                mock_info.assert_called_once_with(
                    'Spark-sql communicates using stdout')

        # Then
        self.assertEqual(
            mock_popen.mock_calls[0],
            call([
                'spark-sql', '-e', 'SELECT 1', '--master', 'yarn', '--name',
                'default-name', '--verbose', '--queue', 'default'
            ],
                 stderr=-2,
                 stdout=-1))
Exemplo n.º 4
0
 def _get_hook(self) -> SparkSqlHook:
     """ Get SparkSqlHook """
     return SparkSqlHook(sql=self._sql,
                         conf=self._conf,
                         conn_id=self._conn_id,
                         total_executor_cores=self._total_executor_cores,
                         executor_cores=self._executor_cores,
                         executor_memory=self._executor_memory,
                         keytab=self._keytab,
                         principal=self._principal,
                         name=self._name,
                         num_executors=self._num_executors,
                         master=self._master,
                         verbose=self._verbose,
                         yarn_queue=self._yarn_queue)
Exemplo n.º 5
0
 def execute(self, context):
     """
     Call the SparkSqlHook to run the provided sql query
     """
     self._hook = SparkSqlHook(
         sql=self._sql,
         conf=self._conf,
         conn_id=self._conn_id,
         total_executor_cores=self._total_executor_cores,
         executor_cores=self._executor_cores,
         executor_memory=self._executor_memory,
         keytab=self._keytab,
         principal=self._principal,
         name=self._name,
         num_executors=self._num_executors,
         master=self._master,
         yarn_queue=self._yarn_queue)
     self._hook.run_query()
Exemplo n.º 6
0
    def test_spark_process_runcmd_and_fail(self, mock_popen):
        # Given
        sql = 'SELECT 1'
        master = 'local'
        params = '--deploy-mode cluster'
        status = 1
        mock_popen.return_value.wait.return_value = status

        # When
        with pytest.raises(AirflowException) as ctx:
            hook = SparkSqlHook(
                conn_id='spark_default',
                sql=sql,
                master=master,
            )
            hook.run_query(params)

        # Then
        assert str(
            ctx.value
        ) == "Cannot execute '{}' on {} (additional parameters: '{}'). Process exit code: {}.".format(
            sql, master, params, status)