示例#1
0
def put_csv_to_hdfs(**kwargs):
    ti = kwargs['ti']
    fa_year = ti.xcom_pull(key='fa_year')
    filename = kwargs['input_file']
    target_path = path.join(kwargs['target_path'], str(fa_year))
    hdfs_hook = WebHDFSHook(webhdfs_conn_id="webhdfs_default")
    hdfs_hook.load_file(filename, target_path)
示例#2
0
def load_to_hdfs(
    source,
    dest,
    hdfs_conn_id='webhdfs_default',
):
    hook = WebHDFSHook(hdfs_conn_id)
    print(
        '###############################################################################################'
    )
    print(hook.get_conn().status('/user/siddhi'))
    print(
        '###############################################################################################'
    )
示例#3
0
 def poke(self, context):
     from airflow.hooks.webhdfs_hook import WebHDFSHook
     c = WebHDFSHook(self.webhdfs_conn_id)
     self.log.info('Poking for file {self.filepath}'.format(**locals()))
     return c.check_for_path(hdfs_path=self.filepath)
示例#4
0
 def setUp(self):
     self.webhdfs_hook = WebHDFSHook()
示例#5
0
class TestWebHDFSHook(unittest.TestCase):
    def setUp(self):
        self.webhdfs_hook = WebHDFSHook()

    @patch('airflow.hooks.webhdfs_hook.InsecureClient')
    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections',
           return_value=[
               Connection(host='host_1', port=123),
               Connection(host='host_2', port=321, login='******')
           ])
    def test_get_conn(self, mock_get_connections, mock_insecure_client):
        mock_insecure_client.side_effect = [
            HdfsError('Error'), mock_insecure_client.return_value
        ]
        conn = self.webhdfs_hook.get_conn()

        mock_insecure_client.assert_has_calls([
            call('http://{host}:{port}'.format(host=connection.host,
                                               port=connection.port),
                 user=connection.login)
            for connection in mock_get_connections.return_value
        ])
        mock_insecure_client.return_value.status.assert_called_once_with('/')
        self.assertEqual(conn, mock_insecure_client.return_value)

    @patch('airflow.hooks.webhdfs_hook.KerberosClient', create=True)
    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections',
           return_value=[Connection(host='host_1', port=123)])
    @patch('airflow.hooks.webhdfs_hook._kerberos_security_mode',
           return_value=True)
    def test_get_conn_kerberos_security_mode(self, mock_kerberos_security_mode,
                                             mock_get_connections,
                                             mock_kerberos_client):
        conn = self.webhdfs_hook.get_conn()

        connection = mock_get_connections.return_value[0]
        mock_kerberos_client.assert_called_once_with(
            'http://{host}:{port}'.format(host=connection.host,
                                          port=connection.port))
        self.assertEqual(conn, mock_kerberos_client.return_value)

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections',
           return_value=[])
    def test_get_conn_no_connection_found(self, mock_get_connection):
        with self.assertRaises(AirflowWebHDFSHookException):
            self.webhdfs_hook.get_conn()

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn')
    def test_check_for_path(self, mock_get_conn):
        hdfs_path = 'path'

        exists_path = self.webhdfs_hook.check_for_path(hdfs_path)

        mock_get_conn.assert_called_once_with()
        mock_status = mock_get_conn.return_value.status
        mock_status.assert_called_once_with(hdfs_path, strict=False)
        self.assertEqual(exists_path, bool(mock_status.return_value))

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn')
    def test_load_file(self, mock_get_conn):
        source = 'source'
        destination = 'destination'

        self.webhdfs_hook.load_file(source, destination)

        mock_get_conn.assert_called_once_with()
        mock_upload = mock_get_conn.return_value.upload
        mock_upload.assert_called_once_with(hdfs_path=destination,
                                            local_path=source,
                                            overwrite=True,
                                            n_threads=1)

    def test_simple_init(self):
        c = WebHDFSHook()
        self.assertIsNone(c.proxy_user)

    def test_init_proxy_user(self):
        c = WebHDFSHook(proxy_user='******')
        self.assertEqual('someone', c.proxy_user)
示例#6
0
 def test_init_proxy_user(self):
     c = WebHDFSHook(proxy_user='******')
     self.assertEqual('someone', c.proxy_user)
示例#7
0
 def test_simple_init(self):
     c = WebHDFSHook()
     self.assertIsNone(c.proxy_user)
 def setUp(self):
     self.webhdfs_hook = WebHDFSHook()
class TestWebHDFSHook(unittest.TestCase):

    def setUp(self):
        self.webhdfs_hook = WebHDFSHook()

    @patch('airflow.hooks.webhdfs_hook.InsecureClient')
    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[
        Connection(host='host_1', port=123),
        Connection(host='host_2', port=321, login='******')
    ])
    def test_get_conn(self, mock_get_connections, mock_insecure_client):
        mock_insecure_client.side_effect = [HdfsError('Error'), mock_insecure_client.return_value]
        conn = self.webhdfs_hook.get_conn()

        mock_insecure_client.assert_has_calls([
            call('http://{host}:{port}'.format(host=connection.host, port=connection.port),
                 user=connection.login)
            for connection in mock_get_connections.return_value
        ])
        mock_insecure_client.return_value.status.assert_called_once_with('/')
        self.assertEqual(conn, mock_insecure_client.return_value)

    @patch('airflow.hooks.webhdfs_hook.KerberosClient', create=True)
    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[
        Connection(host='host_1', port=123)
    ])
    @patch('airflow.hooks.webhdfs_hook._kerberos_security_mode', return_value=True)
    def test_get_conn_kerberos_security_mode(self,
                                             mock_kerberos_security_mode,
                                             mock_get_connections,
                                             mock_kerberos_client):
        conn = self.webhdfs_hook.get_conn()

        connection = mock_get_connections.return_value[0]
        mock_kerberos_client.assert_called_once_with(
            'http://{host}:{port}'.format(host=connection.host, port=connection.port))
        self.assertEqual(conn, mock_kerberos_client.return_value)

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[])
    def test_get_conn_no_connection_found(self, mock_get_connection):
        with self.assertRaises(AirflowWebHDFSHookException):
            self.webhdfs_hook.get_conn()

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn')
    def test_check_for_path(self, mock_get_conn):
        hdfs_path = 'path'

        exists_path = self.webhdfs_hook.check_for_path(hdfs_path)

        mock_get_conn.assert_called_once_with()
        mock_status = mock_get_conn.return_value.status
        mock_status.assert_called_once_with(hdfs_path, strict=False)
        self.assertEqual(exists_path, bool(mock_status.return_value))

    @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn')
    def test_load_file(self, mock_get_conn):
        source = 'source'
        destination = 'destination'

        self.webhdfs_hook.load_file(source, destination)

        mock_get_conn.assert_called_once_with()
        mock_upload = mock_get_conn.return_value.upload
        mock_upload.assert_called_once_with(
            hdfs_path=destination,
            local_path=source,
            overwrite=True,
            n_threads=1
        )
示例#10
0
 def poke(self, context):
     from airflow.hooks.webhdfs_hook import WebHDFSHook
     c = WebHDFSHook(self.webhdfs_conn_id)
     self.log.info('Poking for file %s', self.filepath)
     return c.check_for_path(hdfs_path=self.filepath)
def load_file(destination, source, conn_id='hdfs_http', overwrite=True):
    hook = WebHDFSHook(conn_id)
    hook.load_file(source, destination, overwrite)
示例#12
0
 def poke(self, context):
     from airflow.hooks.webhdfs_hook import WebHDFSHook
     c = WebHDFSHook(self.webhdfs_conn_id)
     logging.info(
         'Poking for file {self.filepath} '.format(**locals()))
     return c.check_for_path(hdfs_path=self.filepath)