Exemplo n.º 1
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        connections = self.get_connections(self.hdfs_conn_id)

        use_sasl = False
        if configuration.get('core', 'security') == 'kerberos':
            use_sasl = True

        client = None

        ''' When using HAClient, proxy_user must be the same, so is ok to always take the first '''
        effective_user = self.proxy_user or connections[0].login
        if len(connections) == 1:
            autoconfig = connections[0].extra_dejson.get('autoconfig', False)
            if autoconfig:
                client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl)
            else:
                client = Client(connections[0].host, connections[0].port,
                                effective_user=effective_user, use_sasl=use_sasl)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        
        return client
Exemplo n.º 2
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        use_sasl = False
        securityConfig = None
        if securityConfig == 'kerberos':  # TODO make confugration file for thiw
            use_sasl = True

        connections = self.get_connections(self.hdfs_conn_id)
        client = None
        # When using HAClient, proxy_user must be the same, so is ok to always take the first
        effective_user = self.proxy_user or connections[0].login
        if len(connections) == 1:
            client = Client(connections[0].host,
                            connections[0].port,
                            use_sasl=use_sasl,
                            effective_user=effective_user)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn,
                              use_sasl=use_sasl,
                              effective_user=effective_user)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        return client
Exemplo n.º 3
0
 def test_ha_client_socket_timeout(self):
     e = socket.timeout
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 4
0
 def test_ha_client_standby_errror(self):
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 5
0
 def test_ha_client_ehostunreach_socket_error(self):
     e = socket.error
     e.errno = errno.EHOSTUNREACH
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 6
0
 def test_ha_client_econnrefused_socket_error(self):
     e = SocketError
     e.errno = errno.ECONNREFUSED
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 7
0
 def test_ha_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + retry_attempts)
Exemplo n.º 8
0
 def test_ha_client_failover_retry_for_exception(self, rpc_call):
     failover_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
 def setup_client(self):
     if 'skiptrash' in self.args:
         use_trash = self.args.usetrash and not self.args.skiptrash
     else:
         use_trash = self.args.usetrash
     self.client = HAClient(self.namenodes, use_trash, self.user, self.use_sasl, self.configs['hdfs_namenode_principal'],
                            self.configs['failover_max_attempts'], self.configs['client_retries'],
                            self.configs['client_sleep_base_millis'], self.configs['client_sleep_max_millis'],
                            self.configs['socket_timeout_millis'], use_datanode_hostname=self.configs['use_datanode_hostname'])
Exemplo n.º 10
0
 def test_response_error_no_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RpcResponseError("Response read error")
     rpc_call.side_effect = e
     nns = [Namenode("foo")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.rename(['foobar'], 'foo')
     self.assertRaises(RpcResponseError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1)
def monitor_db_size():
    try:
        #connect to namenodeHA service with connect timeout setting and request timeout setting
        client = HAClient([n1, n2],
                          use_trash=True,
                          sock_connect_timeout=50000,
                          sock_request_timeout=50000)
    except Exception, ex:
        pass
Exemplo n.º 12
0
 def test_ha_client_failover_retry_for_exception2(self, get_connection):
     failover_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 13
0
 def test_ha_client_failover_retry(self, rpc_call):
     failover_attempts = 3
     e = socket.timeout
     e.message = "socket.timeout"
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemplo n.º 14
0
 def test_ha_client_retry2(self, get_connection):
     retry_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     calls = [call("foo", 8020), call("foo", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 15
0
 def test_ha_client_failover_retry2(self, get_connection):
     failover_attempts = 2
     e = socket.timeout
     e.message = "socket.timeout"
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 16
0
def get_snakebite_hdfs_client():
    """
    获得 snakebite库的HDFS Client
    :return: snakebite HDFS Client
    """
    n1 = Namenode("hadoop101", 9000)
    n2 = Namenode("hadoop102", 9000)
    client = HAClient([n1, n2],
                      effective_user="******",
                      sock_request_timeout=10000000000)
    return client
Exemplo n.º 17
0
 def __init__(self, filepath, hdfs_conn_id='hdfs_default', *args, **kwargs):
     super(HdfsSensor, self).__init__(*args, **kwargs)
     self.filepath = filepath
     session = settings.Session()
     db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first()
     if not db:
         raise Exception("conn_id doesn't exist in the repository")
     self.host = db.host
     self.port = db.port
     NAMENODES = [Namenode(self.host, self.port)]
     self.sb = HAClient(NAMENODES)
     session.commit()
     session.close()
Exemplo n.º 18
0
    def get_conn(self) -> Any:
        """
        Returns a snakebite HDFSClient object.
        """
        # When using HAClient, proxy_user must be the same, so is ok to always
        # take the first.
        effective_user = self.proxy_user
        autoconfig = self.autoconfig
        use_sasl = conf.get('core', 'security') == 'kerberos'

        try:
            connections = self.get_connections(self.hdfs_conn_id)

            if not effective_user:
                effective_user = connections[0].login
            if not autoconfig:
                autoconfig = connections[0].extra_dejson.get(
                    'autoconfig', False)
            hdfs_namenode_principal = connections[0].extra_dejson.get(
                'hdfs_namenode_principal')
        except AirflowException:
            if not autoconfig:
                raise

        if autoconfig:
            # will read config info from $HADOOP_HOME conf files
            client = AutoConfigClient(effective_user=effective_user,
                                      use_sasl=use_sasl)
        elif len(connections) == 1:
            client = Client(
                connections[0].host,
                connections[0].port,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        elif len(connections) > 1:
            name_node = [
                Namenode(conn.host, conn.port) for conn in connections
            ]
            client = HAClient(
                name_node,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository "
                                    "and autoconfig is not specified")

        return client
Exemplo n.º 19
0
 def get_conn(self):
     '''
     Returns a snakebite HDFSClient object.
     '''
     connections = self.get_connections(self.hdfs_conn_id)
     client = None
     if len(connections) == 1:
         client = Client(connections[0].host, connections[0].port)
     elif len(connections) > 1:
         nn = [Namenode(conn.host, conn.port) for conn in connections]
         client = HAClient(nn)
     else:
         raise HDFSHookException("conn_id doesn't exist in the repository")
     return client
Exemplo n.º 20
0
 def __init__(self,
              namenode,
              path,
              use_trash=False,
              effective_user=None,
              use_sasl=True,
              hdfs_namenode_principal='hdfs',
              use_datanode_hostname=False):
     from snakebite.client import HAClient
     from snakebite.namenode import Namenode
     self.path = path
     namenodes = [Namenode(namenode)]
     self._client = HAClient(
         namenodes,
         use_trash=use_trash,
         effective_user=effective_user,
         use_sasl=use_sasl,
         hdfs_namenode_principal=hdfs_namenode_principal,
         use_datanode_hostname=use_datanode_hostname)
Exemplo n.º 21
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        use_sasl = False
        if conf.get('core', 'security') == 'kerberos':
            use_sasl = True

        connections = self.get_connections(self.hdfs_conn_id)
        client = None
        if len(connections) == 1:
            client = Client(connections[0].host,
                            connections[0].port,
                            use_sasl=use_sasl)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn, use_sasl=use_sasl)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        return client
Exemplo n.º 22
0
def __create_hdfs_client__():
    try:
        namenode_conf = os.path.dirname(
            os.path.abspath(__file__)) + '/../conf/namenode.conf'
        config_dict = config_parse.config_parse(namenode_conf)
        if 'namenode' not in config_dict or 'host' not in config_dict['namenode'] or \
                'port' not in config_dict['namenode'] or 'second_namenode' not in config_dict or \
                'host' not in config_dict['second_namenode'] or 'port' not in config_dict['second_namenode']:
            logger.error('namenode config file:[%s] invalid' % namenode_conf)
            sys.exit(2)
        namenode_host = config_dict['namenode']['host']
        namenode_port = int(config_dict['namenode']['port'])
        second_namenode_host = config_dict['second_namenode']['host']
        second_namenode_port = int(config_dict['second_namenode']['port'])

        namenode = Namenode(namenode_host, namenode_port)
        second_namenode = Namenode(second_namenode_host, second_namenode_port)
        return HAClient([namenode, second_namenode], use_trash=True)
    except Exception, e:
        logger.error('create hdfs client exception:[%s]' % str(e))
        sys.exit(2)
Exemplo n.º 23
0
def ha_test():
    n1 = Namenode("192.168.24.137", 9990)
    n2 = Namenode("192.168.24.138", 9990)
    client = HAClient([n1, n2])
    for x in client.ls(['/']):
        print x
Exemplo n.º 24
0
import time

from snakebite.client import HAClient
from snakebite.namenode import Namenode

n1 = Namenode("namenode-1", 8022)
n2 = Namenode("namenode-2", 8022)

#get the timestamp of now
now = time.time()
#get the timestamp of 30 days ago
thirty_day_ago = now - 30 * 24 * 60 * 60

#get the time stamp of 30 days ago with ms timestamp
millis_new = int(round(thirty_day_ago * 1000))
#print millis_new

#connect to the HA client of HDFS namenodes
client = HAClient([n1, n2],
                  use_trash=True,
                  sock_connect_timeout=50000,
                  sock_request_timeout=50000)

for file in client.ls(["/user/spark/applicationHistory/"]):
    file_timestamp = file['access_time']
    file_path = file['path']
    print file_path
    if file_timestamp < millis_new:
        for p in client.delete([file_path], recurse=True):
            print p