示例#1
0
def get_pd_DF(cli: InsecureClient, file_path, header):
    """
       读取hdfs上的csv文件,返回pandas的DataFrame
    :param cli: hdfs的InsecureClient
    :param file_path: hdfs的文件路径,相对InsecureClient里面设置的root路径
    :return:
    """
    with cli.read(file_path) as reader:
        df_pd = pd.read_csv(reader, header=header)
    return df_pd
示例#2
0
from hdfs.client import Client, InsecureClient
import redis
import json
# r = redis.StrictRedis(host='192.168.1.176', port=6379,decode_responses=True)

client = InsecureClient("http://192.168.1.176:50070", user='******')
# 路徑不用hdfs://
# client.list("/") -> ['recipe', 'tmp', 'user']

with client.read("/recipe/recipe1018_V8.json") as reader:
    data = json.load(reader)

print(data[:10])