def generate_temp_files(need_certificate=NEED_CERTIFICATE): if need_certificate: with krbcontext(using_keytab=True, keytab_file=KEYTAB_PATH, principal=PRINCIPAL): for node in HDFS.NODES: try: hdfs_client = KerberosClient(node) hdfs_client.download(HDFS.REMOTE_PATH, HDFS.LOCAL_PATH, n_threads=HDFS.THREAD_NUM) except Exception as err: logging.info(err) else: return logging.error("Failed to download remote HDFS file.") raise Exception("Failed to download remote HDFS file.") else: for node in HDFS.NODES: try: hdfs_client = Client(node) hdfs_client.download(HDFS.REMOTE_PATH, HDFS.LOCAL_PATH, n_threads=HDFS.THREAD_NUM) except Exception as err: logging.info(err) else: return logging.error("Failed to download remote HDFS file.") raise Exception("Failed to download remote HDFS file.")
principal=config['kerberos_principal'], keytab_file=config['keytab_file'], ccache_file=config['kerberos_cache_file']): # hive.Connection() con = hive.connect(host='uatnd02.csdntest.com.local', port=10000, auth='KERBEROS', kerberos_service_name="hive" ) # host为hiveserver2所在节点,port默认10000,为hs2的端口 cursor = con.cursor() cursor.execute('select * from dl_nccp.account limit 5') # 不能有分号! # cursor.execute('desc dl_nccp.account') #不能有分号! datas = cursor.fetchall() print(datas) cursor.close() con.close() conn = dbapi.connect(host='uatnd02.csdntest.com.local', port=10000, auth_mechanism='GSSAPI', kerberos_service_name="hive") cursor = conn.cursor() # hdfs kerberos client = KerberosClient('http://hdfs_ip:50070', hostname_override="hdfs域名") client._list_status() client.list() client.delete() client.upload() client.download() client.makedirs('test')
def check_shapefiles(path_to_zip, zipID): import os import numpy import hdfs import subprocess from hdfs.ext.kerberos import KerberosClient import pandas as pd import geopandas import fiona from shapely.geometry import mapping, shape try: subprocess.call([ "kinit", "*****@*****.**", "-k", "-t", "/home/[email protected]/danr.keytab" ]) hdfs_host = "https://m1a.geo.sciclone.wm.edu:14000" client_hdfs = KerberosClient(hdfs_host) ret_obj = {} t_id = str(uuid.uuid4()) try: os.mkdir("./temp") except: 1 == 1 localDest = "./temp/" + t_id + ".zip" client_hdfs.download(path_to_zip + "/" + zipID, localDest, overwrite=True) ret_obj["error"] = "" ret_obj["warning"] = "" if (zipfile.is_zipfile(localDest)): with zipfile.ZipFile(localDest) as zipObj: zipObj.extractall("./tempSparks/" + t_id + "/") else: ret_obj["error"] = "The zip file failed to extract." return [ret_obj["error"], ret_obj["warning"]] except: ret_obj[ "error"] = "Something went really, really wrong here. Probably a corrupt zip, but could be lots of things... |" return [ret_obj["error"], ret_obj["warning"]] #Ok! File is now extracted and available at path locally: #"./tempSparks/" + t_id + "/" #Now let's load the metadata for this file in...: #Let's actually do some file checks, recording any errors we find to the appropriate dataframe row. try: #Load into geopands shp = geopandas.read_file("./tempSparks/" + t_id + "/") if (not (shp.crs["init"] == "epsg:4326")): ret_obj["error"] = ret_obj[ "error"] + 'Projection was incorrect. EPSG must be 4326. | ' tol = 1e-12 xmin = shp.bounds["minx"].values[0] ymin = shp.bounds["miny"].values[0] xmax = shp.bounds["maxx"].values[0] ymax = shp.bounds["maxy"].values[0] valid = ((xmin >= -180 - tol) and (xmax <= 180 + tol) and (ymin >= -90 - tol) and (ymax <= 90 + tol)) if not valid: ret_obj["error"] = ret_obj[ "error"] + "Bounds appear to be in another castle. xmin: {0}, xmax: {1}, ymin: {2}, ymax: {3} | ".format( xmin, xmax, ymin, ymax) #Fiona checks shp = fiona.open("./tempSparks/" + t_id + "/") valid = True error = None fixed = [] for feature in shp: raw_shape = shape(feature['geometry']) valid = raw_shape.is_valid if valid: fixed.append(feature) if not valid: fixed_shape = raw_shape.buffer(0) fix_valid = fixed_shape.is_valid if fix_valid and error is None: ret_obj[ "warning"] = "There is a minor issue with this boundary - i.e., a river might cross somewhere it should not. We can fix it automatically by using a buffer of 0 in shapely, but this message indicates you should look carefully at the file sometime soon." feature["geometry"] = mapping(fixed_shape) fixed.append(feature) elif not fix_valid: if error is not None: ret_obj["error"] = ret_obj[ "error"] + "An error in the geometry of the file exists that we could not automatically fix. | " else: ret_obj["error"] = ret_obj[ "error"] + "A really bad error in the geometry of the file exists that we could not automatically fix. | " break #Clean up temp files shutil.rmtree("./tempSparks/" + t_id + "/", ignore_errors=True) os.remove("./temp/" + t_id + ".zip") return [ret_obj["error"], ret_obj["warning"]] except: ret_obj[ "error"] = "Something bad happened while we were trying to unpack the shapefile. This error indicates it wasn't a specific issue, but rather the entire shapefile seems corrupted (or something equally bad!)" return [ret_obj["error"], ret_obj["warning"]]
class OperateHDFS: def __init__(self, url): ''' :param url:HDFS名称节点的主机名或IP地址,以协议为前缀,其次是namenode上的WebHDFS端口,也可以指定多个URL以分号分隔以获取高可用性支持. ''' # 实例化HDFS web client using Kerberos authentication self.client = KerberosClient(url) def file_list(self, file_path): ''' :param file_path: HDFS远程目录路径 :return: 返回一个远程目录中包含的所有文件 ''' file_detail = self.client.list(hdfs_path=file_path) return file_detail def file_read(self, file_path): ''' 从HDFS中读取文件 :param file_path: HDFS远程文件路径 :return: ''' lines = [] with self.client.read(hdfs_path=file_path, encoding='utf-8', delimiter=r'\n') as reader: # content = file.read() # print(content) for item in reader: lines.append(item.strip()) return lines def file_create_write(self, file_path, data_write): ''' 在HDFS中创建新文件并写入内容 :param file_path: HDFS远程文件路径 :param data_write: 写入到文件的数据 :return: ''' self.client.write(hdfs_path=file_path, data=data_write, encoding='utf-8') def file_append_write(self, file_path, data_append): ''' 在HDFS中已存在的文件中追加写入内容,文件必须已存在 :param file_path: HDFS远程文件路径 :param data_append: 追加到文件的数据 :return: ''' self.client.write(hdfs_path=file_path, data=data_append, encoding='utf-8', append=True) def file_rename(self, src_file_path, dst_file_path): ''' 重命名/移动文件或文件夹 :param src_file_path: 源文件路径 :param dst_file_path: 目的文件路径 :return: ''' self.client.rename(hdfs_src_path=src_file_path, hdfs_dst_path=dst_file_path) def mkdir(self, file_path): ''' 在HDFS中创建远程目录,必要时递归创建 :param file_path: 需要新建的文件夹路径(包含名字) :return: ''' self.client.makedirs(hdfs_path=file_path) def upload_files(self, file_path, local_path): ''' 上传文件或目录到HDFS :param file_path:HDFS目标路径。如果它已经存在并且是一个目录,文件将被上传其中。 :param local_path:文件或文件夹的本地路径。 如果是文件夹,则将上传其中的所有文件(请注意,这意味着没有文件的文件夹将不会远程创建) :return:hdfs_path_return:成功后,此方法将返回远程上传路径。 ''' hdfs_path_return = self.client.upload(hdfs_path=file_path, local_path=local_path) return hdfs_path_return def download_files(self, file_path, local_path): ''' 从HDFS下载一个文件或文件夹并将其保存在本地 :param file_path:HDFS上要下载的文件或文件夹的路径。 如果是文件夹,则将下载该文件夹下的所有文件 :param local_path:本地路径。 如果它已经存在并且是目录,则文件将在其中下载。 :return: local_path_return:成功后,此方法将返回本地下载路径 ''' local_path_return = self.client.download(hdfs_path=file_path, local_path=local_path) return local_path_return def delete_files(self, file_path): ''' 从HDFS中删除文件或目录 :param file_path: HDFS中需要删除的文件或目录的路径 :return:如果删除成功,则此函数返回“ True”,如果先前在“ hdfs_path”处不存在文件或目录,则返回“ False”。 ''' # recursive:递归删除文件和目录。 默认情况下,如果尝试删除非空目录,则此方法将引发HdfsError。 # skip_trash:设置为false时,已删除的路径将被移至相应的垃圾文件夹,而不是被删除。 这需要Hadoop 2.9+且在集群上启用trash return self.client.delete(hdfs_path=file_path, recursive=False, skip_trash=True) def set_files_permission(self, file_path): ''' 更改文件的权限 :param file_path: 需要更改权限的文件路径 :return: ''' # permission:文件的新八进制权限字符串 self.client.set_permission(hdfs_path=file_path, permission=None)
""" ******************* *Copyright 2017, MapleLabs, All Rights Reserved. * ******************** """ import sys from hdfs.ext.kerberos import KerberosClient from hdfs.client import InsecureClient from requests import Session from requests_kerberos import HTTPKerberosAuth, DISABLED session = Session() session.verify = False kerberos_auth = HTTPKerberosAuth(mutual_authentication=DISABLED, force_preemptive=True, principal='') session.auth = kerberos_auth client = KerberosClient("", session=session) #client = InsecureClient("", session=session) file = sys.argv[1] destfile = sys.argv[2] print client.list('/mr-history/done') client.download(file, destfile, overwrite=True)