def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             self.log.debug('Trying namenode %s', nn.host)
             connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status('/')
             self.log.debug('Using namenode %s for hook', nn.host)
             return client
         except HdfsError as e:
             self.log.debug(
                 "Read operation on namenode {nn.host} failed with error: {e}"
                 .format(**locals()))
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed on the namenodes below:\n{}".format(
         "\n".join(nn_hosts))
     raise AirflowWebHDFSHookException(no_nn_error)
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             self.log.debug('Trying namenode %s', nn.host)
             connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status('/')
             self.log.debug('Using namenode %s for hook', nn.host)
             return client
         except HdfsError as e:
             self.log.debug(
                 "Read operation on namenode {nn.host} "
                 "failed with error: {e}".format(**locals())
             )
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed " \
                   "on the namenodes below:\n{}".format("\n".join(nn_hosts))
     raise AirflowWebHDFSHookException(no_nn_error)
示例#3
0
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             logging.debug("Trying namenode {}".format(nn.host))
             connection_str = "http://{nn.host}:{nn.port}".format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status("/")
             logging.debug("Using namenode {} for hook".format(nn.host))
             return client
         except HdfsError as e:
             logging.debug(
                 "Read operation on namenode {nn.host} failed with"
                 " error: {e.message}".format(**locals())
             )
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed on the namenodes below:\n{}".format(
         "\n".join(nn_hosts)
     )
     raise Exception(no_nn_error)
示例#4
0
    file_list = subprocess.check_output([hive_script_name,hive_db_a,hive_db_b])
    file_list_arr = file_list.split(' ')

    py_logger.info("hive execution completed")

    client = KerberosClient(hdfs_url)

    s3 = session.client('s3',use_ssl=False, verify=False)
    counter = 0

    for file_path in file_list_arr:

        file_path = source_directory + file_path

        status = client.status(file_path, strict=False)

        if bool(status):
            file_name = os.path.basename(file_path)
            key_name = s3_folder_name + file_name

            with client.read(file_path) as f:
                s3.upload_fileobj(f, bucket_name, key_name)

            client.delete(file_path, recursive=False, skip_trash=True)
            counter = counter + 1
            py_logger.info("File: " + file_path + " moved to s3 bucket")
        
    py_logger.info("S3 script execution completed. No.of Files moved: " + str(counter))

	#Compresses the log files which are greater than 30 days