def put_csv_to_hdfs(**kwargs): ti = kwargs['ti'] fa_year = ti.xcom_pull(key='fa_year') filename = kwargs['input_file'] target_path = path.join(kwargs['target_path'], str(fa_year)) hdfs_hook = WebHDFSHook(webhdfs_conn_id="webhdfs_default") hdfs_hook.load_file(filename, target_path)
def load_to_hdfs( source, dest, hdfs_conn_id='webhdfs_default', ): hook = WebHDFSHook(hdfs_conn_id) print( '###############################################################################################' ) print(hook.get_conn().status('/user/siddhi')) print( '###############################################################################################' )
def poke(self, context): from airflow.hooks.webhdfs_hook import WebHDFSHook c = WebHDFSHook(self.webhdfs_conn_id) self.log.info('Poking for file {self.filepath}'.format(**locals())) return c.check_for_path(hdfs_path=self.filepath)
def setUp(self): self.webhdfs_hook = WebHDFSHook()
class TestWebHDFSHook(unittest.TestCase): def setUp(self): self.webhdfs_hook = WebHDFSHook() @patch('airflow.hooks.webhdfs_hook.InsecureClient') @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[ Connection(host='host_1', port=123), Connection(host='host_2', port=321, login='******') ]) def test_get_conn(self, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [ HdfsError('Error'), mock_insecure_client.return_value ] conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call('http://{host}:{port}'.format(host=connection.host, port=connection.port), user=connection.login) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') self.assertEqual(conn, mock_insecure_client.return_value) @patch('airflow.hooks.webhdfs_hook.KerberosClient', create=True) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[Connection(host='host_1', port=123)]) @patch('airflow.hooks.webhdfs_hook._kerberos_security_mode', return_value=True) def test_get_conn_kerberos_security_mode(self, mock_kerberos_security_mode, mock_get_connections, mock_kerberos_client): conn = self.webhdfs_hook.get_conn() connection = mock_get_connections.return_value[0] mock_kerberos_client.assert_called_once_with( 'http://{host}:{port}'.format(host=connection.host, port=connection.port)) self.assertEqual(conn, mock_kerberos_client.return_value) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[]) def test_get_conn_no_connection_found(self, mock_get_connection): with self.assertRaises(AirflowWebHDFSHookException): self.webhdfs_hook.get_conn() @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn') def test_check_for_path(self, mock_get_conn): hdfs_path = 'path' exists_path = self.webhdfs_hook.check_for_path(hdfs_path) mock_get_conn.assert_called_once_with() mock_status = mock_get_conn.return_value.status mock_status.assert_called_once_with(hdfs_path, strict=False) self.assertEqual(exists_path, bool(mock_status.return_value)) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn') def test_load_file(self, mock_get_conn): source = 'source' destination = 'destination' self.webhdfs_hook.load_file(source, destination) mock_get_conn.assert_called_once_with() mock_upload = mock_get_conn.return_value.upload mock_upload.assert_called_once_with(hdfs_path=destination, local_path=source, overwrite=True, n_threads=1) def test_simple_init(self): c = WebHDFSHook() self.assertIsNone(c.proxy_user) def test_init_proxy_user(self): c = WebHDFSHook(proxy_user='******') self.assertEqual('someone', c.proxy_user)
def test_init_proxy_user(self): c = WebHDFSHook(proxy_user='******') self.assertEqual('someone', c.proxy_user)
def test_simple_init(self): c = WebHDFSHook() self.assertIsNone(c.proxy_user)
class TestWebHDFSHook(unittest.TestCase): def setUp(self): self.webhdfs_hook = WebHDFSHook() @patch('airflow.hooks.webhdfs_hook.InsecureClient') @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[ Connection(host='host_1', port=123), Connection(host='host_2', port=321, login='******') ]) def test_get_conn(self, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [HdfsError('Error'), mock_insecure_client.return_value] conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call('http://{host}:{port}'.format(host=connection.host, port=connection.port), user=connection.login) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') self.assertEqual(conn, mock_insecure_client.return_value) @patch('airflow.hooks.webhdfs_hook.KerberosClient', create=True) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[ Connection(host='host_1', port=123) ]) @patch('airflow.hooks.webhdfs_hook._kerberos_security_mode', return_value=True) def test_get_conn_kerberos_security_mode(self, mock_kerberos_security_mode, mock_get_connections, mock_kerberos_client): conn = self.webhdfs_hook.get_conn() connection = mock_get_connections.return_value[0] mock_kerberos_client.assert_called_once_with( 'http://{host}:{port}'.format(host=connection.host, port=connection.port)) self.assertEqual(conn, mock_kerberos_client.return_value) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_connections', return_value=[]) def test_get_conn_no_connection_found(self, mock_get_connection): with self.assertRaises(AirflowWebHDFSHookException): self.webhdfs_hook.get_conn() @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn') def test_check_for_path(self, mock_get_conn): hdfs_path = 'path' exists_path = self.webhdfs_hook.check_for_path(hdfs_path) mock_get_conn.assert_called_once_with() mock_status = mock_get_conn.return_value.status mock_status.assert_called_once_with(hdfs_path, strict=False) self.assertEqual(exists_path, bool(mock_status.return_value)) @patch('airflow.hooks.webhdfs_hook.WebHDFSHook.get_conn') def test_load_file(self, mock_get_conn): source = 'source' destination = 'destination' self.webhdfs_hook.load_file(source, destination) mock_get_conn.assert_called_once_with() mock_upload = mock_get_conn.return_value.upload mock_upload.assert_called_once_with( hdfs_path=destination, local_path=source, overwrite=True, n_threads=1 )
def poke(self, context): from airflow.hooks.webhdfs_hook import WebHDFSHook c = WebHDFSHook(self.webhdfs_conn_id) self.log.info('Poking for file %s', self.filepath) return c.check_for_path(hdfs_path=self.filepath)
def load_file(destination, source, conn_id='hdfs_http', overwrite=True): hook = WebHDFSHook(conn_id) hook.load_file(source, destination, overwrite)
def poke(self, context): from airflow.hooks.webhdfs_hook import WebHDFSHook c = WebHDFSHook(self.webhdfs_conn_id) logging.info( 'Poking for file {self.filepath} '.format(**locals())) return c.check_for_path(hdfs_path=self.filepath)