def get_pd_DF(cli: InsecureClient, file_path, header): """ 读取hdfs上的csv文件,返回pandas的DataFrame :param cli: hdfs的InsecureClient :param file_path: hdfs的文件路径,相对InsecureClient里面设置的root路径 :return: """ with cli.read(file_path) as reader: df_pd = pd.read_csv(reader, header=header) return df_pd
from hdfs.client import Client, InsecureClient import redis import json # r = redis.StrictRedis(host='192.168.1.176', port=6379,decode_responses=True) client = InsecureClient("http://192.168.1.176:50070", user='******') # 路徑不用hdfs:// # client.list("/") -> ['recipe', 'tmp', 'user'] with client.read("/recipe/recipe1018_V8.json") as reader: data = json.load(reader) print(data[:10])