def poke(self, context):
     from airflow.providers.apache.hdfs.hooks.webhdfs import WebHDFSHook
     hook = WebHDFSHook(self.webhdfs_conn_id)
     self.log.info('Poking for file %s', self.filepath)
     return hook.check_for_path(hdfs_path=self.filepath)
示例#2
0
class TestWebHDFSHook(unittest.TestCase):
    def setUp(self):
        self.webhdfs_hook = WebHDFSHook()

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.InsecureClient')
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections',
        return_value=[
            Connection(host='host_1', port=123),
            Connection(host='host_2', port=321, login='******')
        ],
    )
    @patch("airflow.providers.apache.hdfs.hooks.webhdfs.socket")
    def test_get_conn(self, socket_mock, mock_get_connections,
                      mock_insecure_client):
        mock_insecure_client.side_effect = [
            HdfsError('Error'), mock_insecure_client.return_value
        ]
        socket_mock.socket.return_value.connect_ex.return_value = 0
        conn = self.webhdfs_hook.get_conn()

        mock_insecure_client.assert_has_calls([
            call(
                f'http://{connection.host}:{connection.port}',
                user=connection.login,
            ) for connection in mock_get_connections.return_value
        ])
        mock_insecure_client.return_value.status.assert_called_once_with('/')
        assert conn == mock_insecure_client.return_value

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.KerberosClient',
           create=True)
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections',
        return_value=[Connection(host='host_1', port=123)],
    )
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs._kerberos_security_mode',
        return_value=True)
    @patch("airflow.providers.apache.hdfs.hooks.webhdfs.socket")
    def test_get_conn_kerberos_security_mode(self, socket_mock,
                                             mock_kerberos_security_mode,
                                             mock_get_connections,
                                             mock_kerberos_client):
        socket_mock.socket.return_value.connect_ex.return_value = 0
        conn = self.webhdfs_hook.get_conn()

        connection = mock_get_connections.return_value[0]
        mock_kerberos_client.assert_called_once_with(
            f'http://{connection.host}:{connection.port}')
        assert conn == mock_kerberos_client.return_value

    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook._find_valid_server',
        return_value=None)
    def test_get_conn_no_connection_found(self, mock_get_connection):
        with pytest.raises(AirflowWebHDFSHookException):
            self.webhdfs_hook.get_conn()

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn')
    def test_check_for_path(self, mock_get_conn):
        hdfs_path = 'path'

        exists_path = self.webhdfs_hook.check_for_path(hdfs_path)

        mock_get_conn.assert_called_once_with()
        mock_status = mock_get_conn.return_value.status
        mock_status.assert_called_once_with(hdfs_path, strict=False)
        assert exists_path == bool(mock_status.return_value)

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn')
    def test_load_file(self, mock_get_conn):
        source = 'source'
        destination = 'destination'

        self.webhdfs_hook.load_file(source, destination)

        mock_get_conn.assert_called_once_with()
        mock_upload = mock_get_conn.return_value.upload
        mock_upload.assert_called_once_with(hdfs_path=destination,
                                            local_path=source,
                                            overwrite=True,
                                            n_threads=1)

    def test_simple_init(self):
        hook = WebHDFSHook()
        assert hook.proxy_user is None

    def test_init_proxy_user(self):
        hook = WebHDFSHook(proxy_user='******')
        assert 'someone' == hook.proxy_user
示例#3
0
 def setUp(self):
     self.webhdfs_hook = WebHDFSHook()
示例#4
0
 def test_init_proxy_user(self):
     hook = WebHDFSHook(proxy_user='******')
     assert 'someone' == hook.proxy_user
示例#5
0
 def test_simple_init(self):
     hook = WebHDFSHook()
     assert hook.proxy_user is None
示例#6
0
 def test_init_proxy_user(self):
     hook = WebHDFSHook(proxy_user='******')
     self.assertEqual('someone', hook.proxy_user)
示例#7
0
 def test_simple_init(self):
     hook = WebHDFSHook()
     self.assertIsNone(hook.proxy_user)
示例#8
0
class TestWebHDFSHook(unittest.TestCase):
    def setUp(self):
        self.webhdfs_hook = WebHDFSHook()

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.InsecureClient')
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections',
        return_value=[
            Connection(host='host_1', port=123),
            Connection(host='host_2', port=321, login='******')
        ])
    def test_get_conn(self, mock_get_connections, mock_insecure_client):
        mock_insecure_client.side_effect = [
            HdfsError('Error'), mock_insecure_client.return_value
        ]
        conn = self.webhdfs_hook.get_conn()

        mock_insecure_client.assert_has_calls([
            call('http://{host}:{port}'.format(host=connection.host,
                                               port=connection.port),
                 user=connection.login)
            for connection in mock_get_connections.return_value
        ])
        mock_insecure_client.return_value.status.assert_called_once_with('/')
        self.assertEqual(conn, mock_insecure_client.return_value)

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.KerberosClient',
           create=True)
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections',
        return_value=[Connection(host='host_1', port=123)])
    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs._kerberos_security_mode',
        return_value=True)
    def test_get_conn_kerberos_security_mode(self, mock_kerberos_security_mode,
                                             mock_get_connections,
                                             mock_kerberos_client):
        conn = self.webhdfs_hook.get_conn()

        connection = mock_get_connections.return_value[0]
        mock_kerberos_client.assert_called_once_with(
            'http://{host}:{port}'.format(host=connection.host,
                                          port=connection.port))
        self.assertEqual(conn, mock_kerberos_client.return_value)

    @patch(
        'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections',
        return_value=[])
    def test_get_conn_no_connection_found(self, mock_get_connection):
        with self.assertRaises(AirflowWebHDFSHookException):
            self.webhdfs_hook.get_conn()

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn')
    def test_check_for_path(self, mock_get_conn):
        hdfs_path = 'path'

        exists_path = self.webhdfs_hook.check_for_path(hdfs_path)

        mock_get_conn.assert_called_once_with()
        mock_status = mock_get_conn.return_value.status
        mock_status.assert_called_once_with(hdfs_path, strict=False)
        self.assertEqual(exists_path, bool(mock_status.return_value))

    @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn')
    def test_load_file(self, mock_get_conn):
        source = 'source'
        destination = 'destination'

        self.webhdfs_hook.load_file(source, destination)

        mock_get_conn.assert_called_once_with()
        mock_upload = mock_get_conn.return_value.upload
        mock_upload.assert_called_once_with(hdfs_path=destination,
                                            local_path=source,
                                            overwrite=True,
                                            n_threads=1)

    def test_simple_init(self):
        hook = WebHDFSHook()
        self.assertIsNone(hook.proxy_user)

    def test_init_proxy_user(self):
        hook = WebHDFSHook(proxy_user='******')
        self.assertEqual('someone', hook.proxy_user)