Exemplo n.º 1
0
 def test_ha_client_socket_timeout(self):
     e = socket.timeout
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 2
0
 def test_ha_client_standby_errror(self):
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 3
0
 def test_ha_client_socket_timeout(self):
     e = socket.timeout
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 4
0
 def test_ha_client_standby_errror(self):
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 5
0
 def test_ha_client_econnrefused_socket_error(self):
     e = socket.error
     e.errno = errno.ECONNREFUSED
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ["foobar"])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 6
0
 def test_ha_client_ehostunreach_socket_error(self):
     e = socket.error
     e.errno = errno.EHOSTUNREACH
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 7
0
 def test_ha_client_ehostunreach_socket_error(self):
     e = socket.error
     e.errno = errno.EHOSTUNREACH
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 8
0
 def test_ha_client_econnrefused_socket_error(self):
     e = SocketError
     e.errno = errno.ECONNREFUSED
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemplo n.º 9
0
 def test_ha_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(RequestError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + retry_attempts)
Exemplo n.º 10
0
 def test_ha_client_failover_retry_for_exception(self, rpc_call):
     failover_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
 def setup_client(self):
     if 'skiptrash' in self.args:
         use_trash = self.args.usetrash and not self.args.skiptrash
     else:
         use_trash = self.args.usetrash
     self.client = HAClient(self.namenodes, use_trash, self.user, self.use_sasl, self.configs['hdfs_namenode_principal'],
                            self.configs['failover_max_attempts'], self.configs['client_retries'],
                            self.configs['client_sleep_base_millis'], self.configs['client_sleep_max_millis'],
                            self.configs['socket_timeout_millis'], use_datanode_hostname=self.configs['use_datanode_hostname'])
Exemplo n.º 12
0
 def test_response_error_no_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RpcResponseError("Response read error")
     rpc_call.side_effect = e
     nns = [Namenode("foo")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.rename(['foobar'], 'foo')
     self.assertRaises(RpcResponseError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1)
Exemplo n.º 13
0
 def test_ha_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + retry_attempts)
Exemplo n.º 14
0
 def test_ha_client_failover_retry_for_exception(self, rpc_call):
     failover_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemplo n.º 15
0
 def test_ha_client_failover_retry_for_exception2(self, get_connection):
     failover_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 16
0
 def test_ha_client_failover_retry(self, rpc_call):
     failover_attempts = 3
     e = socket.timeout
     e.message = "socket.timeout"
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemplo n.º 17
0
 def test_ha_client_failover_retry(self, rpc_call):
     failover_attempts = 3
     e = socket.timeout
     e.message = "socket.timeout"
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemplo n.º 18
0
 def test_ha_client_retry2(self, get_connection):
     retry_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     calls = [call("foo", 8020), call("foo", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 19
0
 def test_ha_client_failover_retry_for_exception2(self, get_connection):
     failover_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 20
0
 def test_ha_client_retry2(self, get_connection):
     retry_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(RequestError, all, cat_result_gen)
     calls = [call("foo", 8020), call("foo", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 21
0
 def test_ha_client_failover_retry2(self, get_connection):
     failover_attempts = 2
     e = socket.timeout
     e.message = "socket.timeout"
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 22
0
 def test_ha_client_failover_retry2(self, get_connection):
     failover_attempts = 2
     e = socket.timeout
     e.message = "socket.timeout"
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(["foobar"])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemplo n.º 23
0
 def __init__(self, filepath, hdfs_conn_id='hdfs_default', *args, **kwargs):
     super(HdfsSensor, self).__init__(*args, **kwargs)
     self.filepath = filepath
     session = settings.Session()
     db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first()
     if not db:
         raise Exception("conn_id doesn't exist in the repository")
     self.host = db.host
     self.port = db.port
     NAMENODES = [Namenode(self.host, self.port)]
     self.sb = HAClient(NAMENODES)
     session.commit()
     session.close()
Exemplo n.º 24
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        connections = self.get_connections(self.hdfs_conn_id)

        use_sasl = False
        if configuration.get('core', 'security') == 'kerberos':
            use_sasl = True

        client = None

        ''' When using HAClient, proxy_user must be the same, so is ok to always take the first '''
        effective_user = self.proxy_user or connections[0].login
        if len(connections) == 1:
            autoconfig = connections[0].extra_dejson.get('autoconfig', False)
            if autoconfig:
                client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl)
            else:
                client = Client(connections[0].host, connections[0].port,
                                effective_user=effective_user, use_sasl=use_sasl)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        
        return client
Exemplo n.º 25
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        use_sasl = False
        securityConfig = None
        if securityConfig == 'kerberos':  # TODO make confugration file for thiw
            use_sasl = True

        connections = self.get_connections(self.hdfs_conn_id)
        client = None
        # When using HAClient, proxy_user must be the same, so is ok to always take the first
        effective_user = self.proxy_user or connections[0].login
        if len(connections) == 1:
            client = Client(connections[0].host,
                            connections[0].port,
                            use_sasl=use_sasl,
                            effective_user=effective_user)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn,
                              use_sasl=use_sasl,
                              effective_user=effective_user)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        return client
Exemplo n.º 26
0
class HdfsSensor(BaseSensorOperator):
    """
    Waits for a file or folder to land in HDFS
    """
    template_fields = ('filepath', )
    __mapper_args__ = {'polymorphic_identity': 'HdfsSensor'}

    @apply_defaults
    def __init__(self, filepath, hdfs_conn_id='hdfs_default', *args, **kwargs):
        super(HdfsSensor, self).__init__(*args, **kwargs)
        self.filepath = filepath
        session = settings.Session()
        db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first()
        if not db:
            raise Exception("conn_id doesn't exist in the repository")
        self.host = db.host
        self.port = db.port
        NAMENODES = [Namenode(self.host, self.port)]
        self.sb = HAClient(NAMENODES)
        session.commit()
        session.close()

    def poke(self):
        logging.getLogger("snakebite").setLevel(logging.WARNING)
        logging.info('Poking for file {self.filepath} '.format(**locals()))
        try:
            files = [f for f in self.sb.ls([self.filepath])]
        except:
            return False
        return True
def monitor_db_size():
    try:
        #connect to namenodeHA service with connect timeout setting and request timeout setting
        client = HAClient([n1, n2],
                          use_trash=True,
                          sock_connect_timeout=50000,
                          sock_request_timeout=50000)
    except Exception, ex:
        pass
Exemplo n.º 28
0
 def __init__(self,
              namenode,
              path,
              use_trash=False,
              effective_user=None,
              use_sasl=True,
              hdfs_namenode_principal='hdfs',
              use_datanode_hostname=False):
     from snakebite.client import HAClient
     from snakebite.namenode import Namenode
     self.path = path
     namenodes = [Namenode(namenode)]
     self._client = HAClient(
         namenodes,
         use_trash=use_trash,
         effective_user=effective_user,
         use_sasl=use_sasl,
         hdfs_namenode_principal=hdfs_namenode_principal,
         use_datanode_hostname=use_datanode_hostname)
Exemplo n.º 29
0
def get_snakebite_hdfs_client():
    """
    获得 snakebite库的HDFS Client
    :return: snakebite HDFS Client
    """
    n1 = Namenode("hadoop101", 9000)
    n2 = Namenode("hadoop102", 9000)
    client = HAClient([n1, n2],
                      effective_user="******",
                      sock_request_timeout=10000000000)
    return client
Exemplo n.º 30
0
    def get_conn(self) -> Any:
        """
        Returns a snakebite HDFSClient object.
        """
        # When using HAClient, proxy_user must be the same, so is ok to always
        # take the first.
        effective_user = self.proxy_user
        autoconfig = self.autoconfig
        use_sasl = conf.get('core', 'security') == 'kerberos'

        try:
            connections = self.get_connections(self.hdfs_conn_id)

            if not effective_user:
                effective_user = connections[0].login
            if not autoconfig:
                autoconfig = connections[0].extra_dejson.get(
                    'autoconfig', False)
            hdfs_namenode_principal = connections[0].extra_dejson.get(
                'hdfs_namenode_principal')
        except AirflowException:
            if not autoconfig:
                raise

        if autoconfig:
            # will read config info from $HADOOP_HOME conf files
            client = AutoConfigClient(effective_user=effective_user,
                                      use_sasl=use_sasl)
        elif len(connections) == 1:
            client = Client(
                connections[0].host,
                connections[0].port,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        elif len(connections) > 1:
            name_node = [
                Namenode(conn.host, conn.port) for conn in connections
            ]
            client = HAClient(
                name_node,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository "
                                    "and autoconfig is not specified")

        return client
Exemplo n.º 31
0
 def get_conn(self):
     '''
     Returns a snakebite HDFSClient object.
     '''
     connections = self.get_connections(self.hdfs_conn_id)
     client = None
     if len(connections) == 1:
         client = Client(connections[0].host, connections[0].port)
     elif len(connections) > 1:
         nn = [Namenode(conn.host, conn.port) for conn in connections]
         client = HAClient(nn)
     else:
         raise HDFSHookException("conn_id doesn't exist in the repository")
     return client
Exemplo n.º 32
0
class HdfsStorage(_BaseStorage):
    def __init__(self,
                 namenode,
                 path,
                 use_trash=False,
                 effective_user=None,
                 use_sasl=True,
                 hdfs_namenode_principal='hdfs',
                 use_datanode_hostname=False):
        from snakebite.client import HAClient
        from snakebite.namenode import Namenode
        self.path = path
        namenodes = [Namenode(namenode)]
        self._client = HAClient(
            namenodes,
            use_trash=use_trash,
            effective_user=effective_user,
            use_sasl=use_sasl,
            hdfs_namenode_principal=hdfs_namenode_principal,
            use_datanode_hostname=use_datanode_hostname)

    @contextmanager
    def open(self, filename, mode='rb', **kwargs):
        path = '{0}/{1}'.format(self.path, filename)
        if mode.startswith('r'):
            stream = self._hdfs_file_stream(path)
            try:
                yield stream
            finally:
                stream.close()
        elif mode.startswith('w'):
            raise NotImplementedError
        else:
            raise ValueError('Unsupported mode {}'.format(mode))

    def _hdfs_file_stream(self, path):
        try:
            from cStringIO import StringIO
        except:
            from StringIO import StringIO
        generator = self._client.cat([path]).next()
        buf = StringIO()
        for i in generator:
            buf.write(i)
        buf.seek(0)
        return buf

    def get(self, path, **kwargs):
        with self._hdfs_file_stream(path) as f:
            return f.getvalue()
Exemplo n.º 33
0
 def __init__(
         self,
         filepath,
         hdfs_conn_id='hdfs_default',
         *args, **kwargs):
     super(HdfsSensor, self).__init__(*args, **kwargs)
     self.filepath = filepath
     session = settings.Session()
     db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first()
     if not db:
         raise Exception("conn_id doesn't exist in the repository")
     self.host = db.host
     self.port = db.port
     NAMENODES = [Namenode(self.host, self.port)]
     self.sb = HAClient(NAMENODES)
     session.commit()
     session.close()
Exemplo n.º 34
0
    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        use_sasl = False
        if conf.get('core', 'security') == 'kerberos':
            use_sasl = True

        connections = self.get_connections(self.hdfs_conn_id)
        client = None
        if len(connections) == 1:
            client = Client(connections[0].host,
                            connections[0].port,
                            use_sasl=use_sasl)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn, use_sasl=use_sasl)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        return client
Exemplo n.º 35
0
def __create_hdfs_client__():
    try:
        namenode_conf = os.path.dirname(
            os.path.abspath(__file__)) + '/../conf/namenode.conf'
        config_dict = config_parse.config_parse(namenode_conf)
        if 'namenode' not in config_dict or 'host' not in config_dict['namenode'] or \
                'port' not in config_dict['namenode'] or 'second_namenode' not in config_dict or \
                'host' not in config_dict['second_namenode'] or 'port' not in config_dict['second_namenode']:
            logger.error('namenode config file:[%s] invalid' % namenode_conf)
            sys.exit(2)
        namenode_host = config_dict['namenode']['host']
        namenode_port = int(config_dict['namenode']['port'])
        second_namenode_host = config_dict['second_namenode']['host']
        second_namenode_port = int(config_dict['second_namenode']['port'])

        namenode = Namenode(namenode_host, namenode_port)
        second_namenode = Namenode(second_namenode_host, second_namenode_port)
        return HAClient([namenode, second_namenode], use_trash=True)
    except Exception, e:
        logger.error('create hdfs client exception:[%s]' % str(e))
        sys.exit(2)
Exemplo n.º 36
0
class HdfsSensor(BaseSensorOperator):
    """
    Waits for a file or folder to land in HDFS
    """
    template_fields = ('filepath',)
    __mapper_args__ = {
        'polymorphic_identity': 'HdfsSensor'
    }

    @apply_defaults
    def __init__(
            self,
            filepath,
            hdfs_conn_id='hdfs_default',
            *args, **kwargs):
        super(HdfsSensor, self).__init__(*args, **kwargs)
        self.filepath = filepath
        session = settings.Session()
        db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first()
        if not db:
            raise Exception("conn_id doesn't exist in the repository")
        self.host = db.host
        self.port = db.port
        NAMENODES = [Namenode(self.host, self.port)]
        self.sb = HAClient(NAMENODES)
        session.commit()
        session.close()

    def poke(self):
        logging.getLogger("snakebite").setLevel(logging.WARNING)
        logging.info(
            'Poking for file {self.filepath} '.format(**locals()))
        try:
            files = [f for f in self.sb.ls([self.filepath])]
        except:
            return False
        return True
Exemplo n.º 37
0
def ha_test():
    n1 = Namenode("192.168.24.137", 9990)
    n2 = Namenode("192.168.24.138", 9990)
    client = HAClient([n1, n2])
    for x in client.ls(['/']):
        print x
class CommandLineParser(object):

    GENERIC_OPTS = {'D': {"short": '-D',
                          "long": '--debug',
                          "help": 'Show debug information',
                          "action": 'store_true'},
                    'j': {"short": '-j',
                          "long": '--json',
                          "help": 'JSON output',
                          "action": 'store_true'},
                    'n': {"short": '-n',
                          "long": '--namenode',
                          "help": 'namenode host',
                          "type": str},
                    'V': {"short": '-V',
                          "long": '--version',
                          "help": 'Hadoop protocol version (default:%d)' % Namenode.DEFAULT_VERSION,
                          "default": Namenode.DEFAULT_VERSION,
                          "type": float},
                    'p': {"short": '-p',
                          "long": '--port',
                          "help": 'namenode RPC port (default: %d)' % Namenode.DEFAULT_PORT,
                          "type": int},
                    'h': {"short": '-h',
                          "long": '--help',
                          "help": 'show help',
                          "type": int},
                    'v': {"short": '-v',
                          "long": '--ver',
                          "help": 'Display snakebite version',
                          "type": int}
                    }

    SUB_OPTS = {'R': {"short": '-R',
                      "long": '--recurse',
                      "help": 'recurse into subdirectories',
                      "action": 'store_true'},
                'd': {"short": '-d',
                      "long": '--directory',
                      "help": 'show only the path and no children / check if path is a dir',
                      "action": 'store_true'},
                's': {"short": '-s',
                      "long": '--summary',
                      "help": 'print summarized output',
                      "action": 'store_true'},
                'S': {"short": '-S',
                      "long": '--skiptrash',
                      "help": 'skip the trash (when trash is enabled)',
                      "default": False,
                      "action": 'store_true'},
                'T': {"short": '-T',
                      "long": "--usetrash",
                      "help": "enable the trash",
                      "action": 'store_true'},
                'z': {"short": '-z',
                      "long": '--zero',
                      "help": 'check for zero length',
                      "action": 'store_true'},
                'e': {"short": '-e',
                      "long": '--exists',
                      "help": 'check if file exists',
                      "action": 'store_true'},
                'checkcrc': {"short": '-checkcrc',
                             "long": "--checkcrc",
                             "help": 'check Crc',
                             "action": 'store_true'},
                'f': {"short": '-f',
                      "long": "--append",
                      "help": 'show appended data as the file grows',
                      "action": 'store_true'},
                'nl': {"short": '-nl',
                       "long": "--newline",
                       "help": 'add a newline character at the end of each file.',
                       "action": 'store_true'},
                'h': {"short": '-h',
                      "long": '--human',
                      "help": 'human readable output',
                      "action": 'store_true'}
                }

    def __init__(self):
        usage = "snakebite [general options] cmd [arguments]"
        epilog = "\ngeneral options:\n"
        epilog += "\n".join(sorted(["  %-30s %s" % ("%s %s" % (v['short'], v['long']), v['help']) for k, v in self.GENERIC_OPTS.items()]))
        epilog += "\n\ncommands:\n"
        epilog += "\n".join(sorted(["  %-30s %s" % ("%s %s" % (k, v['args']), v['descr']) for k, v in Commands.methods.items() if v['visible']]))
        epilog += "\n\nto see command-specific options use: snakebite [cmd] --help"

        self.parser = Parser(usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter, add_help=False)
        self._build_parent_parser()
        self._add_subparsers()
        self.namenodes = []
        self.user = None
        self.use_sasl = False

    def _build_parent_parser(self):
        #general options
        for opt_name, opt_data in self.GENERIC_OPTS.items():
            if 'action' in opt_data:
                self.parser.add_argument(opt_data['short'], opt_data['long'], help=opt_data['help'], action=opt_data['action'])
            else:
                if 'default' in opt_data:
                    self.parser.add_argument(opt_data['short'], opt_data['long'], help=opt_data['help'], type=opt_data['type'], default=opt_data['default'])
                else:
                    self.parser.add_argument(opt_data['short'], opt_data['long'], help=opt_data['help'], type=opt_data['type'])

    def _add_subparsers(self):
        default_dir = os.path.join("/user", get_current_username())

        #sub-options
        arg_parsers = {}
        for opt_name, opt_data in self.SUB_OPTS.items():
            arg_parsers[opt_name] = argparse.ArgumentParser(add_help=False)
            arg_parsers[opt_name].add_argument(opt_data['short'], opt_data['long'], help=opt_data['help'],
                                               action=opt_data['action'])

        subcommand_help_parser = argparse.ArgumentParser(add_help=False)
        subcommand_help_parser.add_argument('-H', '--help', action='store_true')

        # NOTE: args and dirs are logically equivalent except for default val.
        # Difference in naming gives more valuable error/help output.

        # 0 or more dirs
        positional_arg_parsers = {}
        positional_arg_parsers['[dirs]'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['[dirs]'].add_argument('dir', nargs='*', default=[default_dir], help="[dirs]")

        # 1 or more dirs
        positional_arg_parsers['dir [dirs]'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['dir [dirs]'].add_argument('dir', nargs='+', default=[default_dir], help="dir [dirs]")

        # 2 dirs
        positional_arg_parsers['src dst'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['src dst'].add_argument('src_dst', nargs=2, default=[default_dir], help="src dst")

        # 1 or more args
        positional_arg_parsers['[args]'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['[args]'].add_argument('arg', nargs='*', help="[args]")

        # 1 arg
        positional_arg_parsers['arg'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['arg'].add_argument('single_arg', default=default_dir, help="arg")

        # 1 (integer) arg
        positional_arg_parsers['(int) arg'] = argparse.ArgumentParser(add_help=False)
        positional_arg_parsers['(int) arg'].add_argument('single_int_arg', default='0', help="(integer) arg",
                                                         type=int)

        subparsers = self.parser.add_subparsers()
        for cmd_name, cmd_info in Commands.methods.items():
            parents = [arg_parsers[opt] for opt in cmd_info['allowed_opts'] if opt in arg_parsers]
            parents += [subcommand_help_parser]
            if 'req_args' in cmd_info and not cmd_info['req_args'] is None:
                parents += [positional_arg_parsers[arg] for arg in cmd_info['req_args']]
            command_parser = subparsers.add_parser(cmd_name, add_help=False, parents=parents)
            command_parser.set_defaults(command=cmd_name)

    def init(self):
        self.read_config()
        self._clean_args()
        self.setup_client()

    def _clean_args(self):
        for path in self.__get_all_directories():
            if path.startswith('hdfs://'):
                parse_result = urlparse(path)
                if 'dir' in self.args and path in self.args.dir:
                    self.args.dir.remove(path)
                    self.args.dir.append(parse_result.path)
                else:
                    self.args.single_arg = parse_result.path

    def __usetrash_unset(self):
        return not 'usetrash' in self.args or self.args.usetrash == False

    def __use_cl_port_first(self, alt):
        # Port provided from CL has the highest priority:
        return self.args.port if self.args.port else alt

    def read_config(self):
        self.configs = HDFSConfig.get_external_config()

        # Try to retrieve namenode config from within CL arguments
        if self._read_config_cl():
            return

        config_file = os.path.join(os.path.expanduser('~'), '.snakebiterc')

        if os.path.exists(config_file):
            #if ~/.snakebiterc exists - read config from it
            self._read_config_snakebiterc()
        elif os.path.exists('/etc/snakebiterc'):
            self._read_config_snakebiterc('/etc/snakebiterc')
        else:
            # if configs from HDFS config files exist and contain something
            if self.configs:
                for config in self.configs['namenodes']:
                    nn = Namenode(config['namenode'],
                                  self.__use_cl_port_first(config['port']))
                    self.namenodes.append(nn)
                if self.__usetrash_unset():
                    self.args.usetrash = self.configs['use_trash']
                self.use_sasl = self.configs['use_sasl']

        if len(self.namenodes):
            return
        else:
            print("No ~/.snakebiterc found, no HADOOP_HOME set and no -n and -p provided")
            print("Tried to find core-site.xml in:")
            for core_conf_path in HDFSConfig.core_try_paths:
                print(" - %s" % core_conf_path)
            print("Tried to find hdfs-site.xml in:")
            for hdfs_conf_path in HDFSConfig.hdfs_try_paths:
                print(" - %s" % hdfs_conf_path)
            print("\nYou can manually create ~/.snakebiterc with the following content:")
            print('{')
            print('  "config_version": 2,')
            print('  "use_trash": true,')
            print('  "namenodes": [')
            print('    {"host": "namenode-ha1", "port": %d, "version": %d},' % (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION))
            print('    {"host": "namenode-ha2", "port": %d, "version": %d}' % (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION))
            print('  ]')
            print('}')

            sys.exit(1)

    def _read_config_snakebiterc(self, path = os.path.join(os.path.expanduser('~'), '.snakebiterc')):
        old_version_info = "You're are using snakebite %s with Trash support together with old snakebiterc, please update/remove your %s file. By default Trash is %s." % (path, version(), 'disabled' if not self.configs['use_trash'] else 'enabled')
        with open(path) as config_file:
            configs = json.load(config_file)

        if isinstance(configs, list):
            # Version 1: List of namenodes
            # config is a list of namenode(s) - possibly HA
            for config in configs:
                nn = Namenode(config['namenode'],
                              self.__use_cl_port_first(config.get('port', Namenode.DEFAULT_PORT)),
                              config.get('version', Namenode.DEFAULT_VERSION))
                self.namenodes.append(nn)
            if self.__usetrash_unset():
                # commandline setting has higher priority
                print_info(old_version_info)
                # There's no info about Trash in version 1, use default policy:
                self.args.usetrash = self.configs['use_trash']
        elif isinstance(configs, dict):
            # Version 2: {}
            # Can be either new configuration or just one namenode
            # which was the very first configuration syntax
            if 'config_version' in configs:
                # Config version => 2
                for nn_config in configs['namenodes']:
                    nn = Namenode(nn_config['host'],
                                  self.__use_cl_port_first(nn_config.get('port', Namenode.DEFAULT_PORT)),
                                  nn_config.get('version', Namenode.DEFAULT_VERSION))
                    self.namenodes.append(nn)

                if self.__usetrash_unset():
                    # commandline setting has higher priority
                    self.args.usetrash = configs.get("use_trash", self.configs['use_trash'])

                self.user = configs.get("user")
            else:
                # config is a single namenode - no HA
                self.namenodes.append(Namenode(configs['namenode'],
                                               self.__use_cl_port_first(configs.get('port', Namenode.DEFAULT_PORT)),
                                               configs.get('version', Namenode.DEFAULT_VERSION)))
                if self.__usetrash_unset():
                    # commandline setting has higher priority
                    print_info(old_version_info)
                    self.args.usetrash = self.configs['use_trash']
        else:
            print_error_exit("Config retrieved from %s is corrupted! Remove it!" % path)

    def __get_all_directories(self):
        dirs_to_check = []

        # append single_arg for operations that use single_arg
        # as HDFS path
        if self.args.command in ('mv', 'test', 'tail'):
            if self.args.single_arg is not None:
                dirs_to_check.append(self.args.single_arg)

        # add dirs if they exists:
        if self.args and 'dir' in self.args:
            dirs_to_check += self.args.dir

        return dirs_to_check

    def _read_config_cl(self):
        ''' Check if any directory arguments contain hdfs://'''
        dirs_to_check = self.__get_all_directories()
        hosts, ports = [], []
        for path in dirs_to_check:
            if path.startswith('hdfs://'):
                parse_result = urlparse(path)
                hosts.append(parse_result.hostname)
                ports.append(parse_result.port)

        # remove duplicates and None from (hosts + self.args.namenode)
        hosts = list(filter(lambda x: x != None, set(hosts + [self.args.namenode])))
        if len(hosts) > 1:
            print_error_exit('Conficiting namenode hosts in commandline arguments, hosts: %s' % str(hosts))

        ports = list(filter(lambda x: x != None, set(ports + [self.args.port])))
        if len(ports) > 1:
            print_error_exit('Conflicting namenode ports in commandline arguments, ports: %s' % str(ports))

        # Store port from CL in arguments - CL port has the highest priority
        if len(ports) == 1:
            self.args.port = ports[0]

        # do we agree on one namenode?
        if len(hosts) == 1 and len(ports) <= 1:
            self.args.namenode = hosts[0]
            self.args.port = ports[0] if len(ports) == 1 else Namenode.DEFAULT_PORT
            self.namenodes.append(Namenode(self.args.namenode, self.args.port))
            # we got the info from CL -> check if use_trash is set - if not use default policy:
            if self.__usetrash_unset():
                self.args.usetrash = self.configs['use_trash']
            return True
        else:
            return False

    def parse(self, non_cli_input=None):  # Allow input for testing purposes
        if not sys.argv[1:] and not non_cli_input:
            self.parser.print_help()
            sys.exit(-1)

        try:
            args = self.parser.parse_args(non_cli_input)
        except ArgumentParserError as error:
            if "-h" in sys.argv or "--help" in sys.argv:  # non cli input?
                commands = [cmd for (cmd, description) in Commands.methods.items() if description['visible'] is True]
                command = error.prog.split()[-1]
                if command in commands:
                    self.usage_helper(command)
                else:
                    self.parser.print_help()
                self.parser.exit(2)
            elif "-v" in sys.argv or "--ver" in sys.argv:
                print(version())
                self.parser.exit(0)
            else:
                self.parser.print_usage(sys.stderr)
                self.parser.exit(2, 'error: %s. Use -h for help.\n' % (error.error_message))

        self.cmd = args.command
        self.args = args
        return self.args

    def setup_client(self):
        if 'skiptrash' in self.args:
            use_trash = self.args.usetrash and not self.args.skiptrash
        else:
            use_trash = self.args.usetrash
        self.client = HAClient(self.namenodes, use_trash, self.user, self.use_sasl, self.configs['hdfs_namenode_principal'],
                               self.configs['failover_max_attempts'], self.configs['client_retries'],
                               self.configs['client_sleep_base_millis'], self.configs['client_sleep_max_millis'],
                               self.configs['socket_timeout_millis'], use_datanode_hostname=self.configs['use_datanode_hostname'])

    def execute(self):
        if self.args.help:
            #if 'ls -H' is called, execute 'usage ls'
            self.args.arg = [self.cmd]
            return Commands.methods['usage']['method'](self)
        if not Commands.methods.get(self.cmd):
            self.parser.print_help()
            sys.exit(-1)
        try:
            return Commands.methods[self.cmd]['method'](self)
        except IOError as e:
            if e.errno != errno.EPIPE:
                exitError(sys.exc_info())
        except Exception:
            exitError(sys.exc_info())

    def command(args="", descr="", allowed_opts="", visible=True, req_args=None):
        def wrap(f):
            Commands.methods[f.__name__] = {"method": f,
                                            "args": args,
                                            "descr": descr,
                                            "allowed_opts": allowed_opts,
                                            "visible": visible,
                                            "req_args": req_args}
        return wrap

    @command(visible=False)
    def commands(self):
        print("\n".join(sorted([k for k, v in Commands.methods.items() if v['visible']])))

    @command(args="[path]", descr="Used for command line completion", visible=False, req_args=['[dirs]'])
    def complete(self):
        self.args.summary = True
        self.args.directory = False
        self.args.recurse = False
        self.args.human = False
        try:
            for line in self._listing():
                print(line.replace(" ", "\\\\ "))
        except FileNotFoundException:
            pass

    @command(args="[paths]", descr="list a path", allowed_opts=["d", "R", "s", "h"], req_args=['[dirs]'])
    def ls(self):
        for line in self._listing():
            print(line)

    def _listing(self):
        # Mimicking hadoop client behaviour
        if self.args.directory:
            include_children = False
            recurse = False
            include_toplevel = True
        else:
            include_children = True
            include_toplevel = False
            recurse = self.args.recurse

        listing = self.client.ls(self.args.dir, recurse=recurse,
                                 include_toplevel=include_toplevel,
                                 include_children=include_children)

        for line in format_listing(listing, json_output=self.args.json,
                                   human_readable=self.args.human,
                                   recursive=recurse,
                                   summary=self.args.summary):
            yield line

    @command(args="[paths]", descr="create directories", req_args=['dir [dirs]'])
    def mkdir(self):
        creations = self.client.mkdir(self.args.dir)
        for line in format_results(creations, json_output=self.args.json):
            print(line)

    @command(args="[paths]", descr="create directories and their parents", req_args=['dir [dirs]'])
    def mkdirp(self):
        creations = self.client.mkdir(self.args.dir, create_parent=True)
        for line in format_results(creations, json_output=self.args.json):
            print(line)

    @command(args="<owner:grp> [paths]", descr="change owner", allowed_opts=["R"], req_args=['arg', 'dir [dirs]'])
    def chown(self):
        owner = self.args.single_arg
        try:
            mods = self.client.chown(self.args.dir, owner, recurse=self.args.recurse)
            for line in format_results(mods, json_output=self.args.json):
                print(line)
        except FileNotFoundException:
            exitError(sys.exc_info())

    @command(args="<mode> [paths]", descr="change file mode (octal)", allowed_opts=["R"], req_args=['(int) arg', 'dir [dirs]'])
    def chmod(self):
        mode = int(str(self.args.single_int_arg), 8)
        mods = self.client.chmod(self.args.dir, mode, recurse=self.args.recurse)
        for line in format_results(mods, json_output=self.args.json):
            print(line)

    @command(args="<grp> [paths]", descr="change group", allowed_opts=["R"], req_args=['arg', 'dir [dirs]'])
    def chgrp(self):
        grp = self.args.single_arg
        mods = self.client.chgrp(self.args.dir, grp, recurse=self.args.recurse)
        for line in format_results(mods, json_output=self.args.json):
            print(line)

    @command(args="[paths]", descr="display stats for paths", allowed_opts=['h'], req_args=['[dirs]'])
    def count(self):
        counts = self.client.count(self.args.dir)
        for line in format_counts(counts, json_output=self.args.json,
                                  human_readable=self.args.human):
            print(line)

    @command(args="", descr="display fs stats", allowed_opts=['h'])
    def df(self):
        result = self.client.df()
        for line in format_fs_stats(result, json_output=self.args.json,
                                    human_readable=self.args.human):
            print(line)

    @command(args="[paths]", descr="display disk usage statistics", allowed_opts=["s", "h"], req_args=['[dirs]'])
    def du(self):
        if self.args.summary:
            include_children = False
            include_toplevel = True
        else:
            include_children = True
            include_toplevel = False
        result = self.client.du(self.args.dir, include_toplevel=include_toplevel, include_children=include_children)
        for line in format_du(result, json_output=self.args.json, human_readable=self.args.human):
            print(line)

    @command(args="[paths] dst", descr="move paths to destination", req_args=['dir [dirs]', 'arg'])
    def mv(self):
        paths = self.args.dir
        dst = self.args.single_arg
        result = self.client.rename(paths, dst)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="[paths]", descr="remove paths", allowed_opts=["R", "S", "T"], req_args=['dir [dirs]'])
    def rm(self):
        result = self.client.delete(self.args.dir, recurse=self.args.recurse)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="[paths]", descr="creates a file of zero length", req_args=['dir [dirs]'])
    def touchz(self):
        result = self.client.touchz(self.args.dir)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="", descr="show server information")
    def serverdefaults(self):
        print(self.client.serverdefaults())

    @command(args="[dirs]", descr="delete a directory", req_args=['dir [dirs]'])
    def rmdir(self):
        result = self.client.rmdir(self.args.dir)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="<rep> [paths]", descr="set replication factor", allowed_opts=['R'], req_args=['(int) arg', 'dir [dirs]'])
    def setrep(self):
        rep_factor = int(self.args.single_int_arg)
        result = self.client.setrep(self.args.dir, rep_factor, recurse=self.args.recurse)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="<cmd>", descr="show cmd usage", req_args=['[args]'])
    def usage(self):
        if not 'arg' in self.args or self.args.arg == []:
            self.parser.print_help()
            sys.exit(-1)

        for sub_cmd in self.args.arg:
            self.usage_helper(sub_cmd)

    def usage_helper(self, command):
        cmd_entry = Commands.methods.get(command)
        if not cmd_entry:
            self.parser.print_help()
            sys.exit(-1)
        cmd_args = []
        cmd_descriptions = "\ncommand options: \n"
        allowed_opts = cmd_entry.get('allowed_opts')
        if allowed_opts:
            cmd_args += ["[-%s]" % o for o in allowed_opts]
            cmd_descriptions += "\n".join(sorted([" %-30s %s" % ("%s %s" % (self.SUB_OPTS[o]['short'], self.SUB_OPTS[o]['long']), self.SUB_OPTS[o]['help']) for o in allowed_opts]))
        args = cmd_entry.get('args')
        if args:
            cmd_args.append(args)

        print("usage: snakebite [general options] %s %s" % (command, " ".join(cmd_args)))

        general_opts = "\ngeneral options:\n"
        general_opts += "\n".join(sorted(["  %-30s %s" % ("%s %s" % (v['short'], v['long']), v['help']) for k, v in self.GENERIC_OPTS.items()]))
        print(general_opts)

        if allowed_opts:
            print(cmd_descriptions)

    @command(args="[paths]", descr="stat information", req_args=['dir [dirs]'])
    def stat(self):
        print(format_stat(self.client.stat(self.args.dir), json_output=self.args.json))

    @command(args="path", descr="test a path", allowed_opts=['d', 'z', 'e'], req_args=['arg'])
    def test(self):
        path = self.args.single_arg

        try:
            result = self.client.test(path, exists=self.args.exists, directory=self.args.directory, zero_length=self.args.zero)
        except FileNotFoundException:
            result = False

        if result:
            sys.exit(0)
        else:
            sys.exit(1)

    @command(args="[paths]", descr="copy source paths to stdout", allowed_opts=['checkcrc'], req_args=['dir [dirs]'])
    def cat(self):
        for file_to_read in self.client.cat(self.args.dir, check_crc=self.args.checkcrc):
            for load in file_to_read:
                stdout.write(load)

    @command(args="path dst", descr="copy local file reference to destination", req_args=['dir [dirs]', 'arg'], visible=False)
    def copyFromLocal(self):
        src = self.args.dir
        dst = self.args.single_arg
        result = self.client.copyFromLocal(src, dst)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="[paths] dst", descr="copy paths to local file system destination", allowed_opts=['checkcrc'], req_args=['dir [dirs]', 'arg'])
    def copyToLocal(self):
        paths = self.args.dir
        dst = self.args.single_arg
        result = self.client.copyToLocal(paths, dst, check_crc=self.args.checkcrc)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="[paths] dst", descr="copy files from source to destination", allowed_opts=['checkcrc'], req_args=['dir [dirs]', 'arg'], visible=False)
    def cp(self):
        paths = self.args.dir
        dst = self.args.single_arg
        result = self.client.cp(paths, dst, checkcrc=self.args.checkcrc)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="file dst", descr="copy files to local file system destination", allowed_opts=['checkcrc'], req_args=['dir [dirs]', 'arg'])
    def get(self):
        paths = self.args.dir
        dst = self.args.single_arg
        result = self.client.copyToLocal(paths, dst, check_crc=self.args.checkcrc)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    @command(args="dir dst", descr="concatenates files in source dir into destination local file", allowed_opts=['nl'], req_args=['src dst'])
    def getmerge(self):
        source = self.args.src_dst[0]
        dst = self.args.src_dst[1]
        result = self.client.getmerge(source, dst, newline=self.args.newline)
        for line in format_results(result, json_output=self.args.json):
            print(line)

    # @command(args="[paths] dst", descr="copy sources from local file system to destination", req_args=['dir [dirs]', 'arg'])
    # def put(self):
    #     paths = self.args.dir
    #     dst = self.args.single_arg
    #     result = self.client.put(paths, dst)
    #     for line in format_results(result, json_output=self.args.json):
    #         print line

    @command(args="path", descr="display last kilobyte of the file to stdout", allowed_opts=['f'], req_args=['arg'])
    def tail(self):
        path = self.args.single_arg
        result = self.client.tail(path, append=self.args.append)
        for line in result:
            print(line)

    @command(args="path [paths]", descr="output file in text format", allowed_opts=['checkcrc'], req_args=['dir [dirs]'])
    def text(self):
        paths = self.args.dir
        result = self.client.text(paths)
        for line in result:
            print(line)
Exemplo n.º 39
0
import time

from snakebite.client import HAClient
from snakebite.namenode import Namenode

n1 = Namenode("namenode-1", 8022)
n2 = Namenode("namenode-2", 8022)

#get the timestamp of now
now = time.time()
#get the timestamp of 30 days ago
thirty_day_ago = now - 30 * 24 * 60 * 60

#get the time stamp of 30 days ago with ms timestamp
millis_new = int(round(thirty_day_ago * 1000))
#print millis_new

#connect to the HA client of HDFS namenodes
client = HAClient([n1, n2],
                  use_trash=True,
                  sock_connect_timeout=50000,
                  sock_request_timeout=50000)

for file in client.ls(["/user/spark/applicationHistory/"]):
    file_timestamp = file['access_time']
    file_path = file['path']
    print file_path
    if file_timestamp < millis_new:
        for p in client.delete([file_path], recurse=True):
            print p
Exemplo n.º 40
0
def ha_test():
    n1 = Namenode("192.168.24.137", 9990)
    n2 = Namenode("192.168.24.138", 9990)
    client=HAClient([n1,n2])
    for x in client.ls(['/']):
        print x