def convert_to_parquet(pJson): with open(pJson, 'r') as f: data = json.loads(f.read()) f.close() df = pd.json_normalize(data) parqId = uuid.uuid1() parqHexVal = parqId.hex pathParquet = "../output/jsonParquet-" + parqHexVal + ".parquet" df.to_parquet(path=pathParquet, compression='GZIP') print(pd.read_parquet(pathParquet)) try: webHDFS = webhdfspy.WebHDFSClient("mercury.tritronik.com", 50070, "hdfs") pathHdfs = '/dpi-aggregate/' + parqHexVal + ".parquet" webHDFS.copyfromlocal(pathParquet, pathHdfs) os.remove(pJson) os.remove(pathParquet) except: logging.error(traceback)
def setUp(self): self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070, 'fabio')
#!/usr/bin/env python import time import json import webhdfspy c = webhdfspy.WebHDFSClient('1.1.1.1', 8443, 'USER', 'PASS') print '\n## list dir ##' print json.dumps(c.listdir('/tmp'), indent=4) time.sleep(1) print '\n## mkdir: /tmp/test_webhdfs ##' c.mkdir('/tmp/test_webhdfs') time.sleep(1) print '\n## create file: /tmp/test_webhdfs/text ##' c.create('/tmp/test_webhdfs/text', 'text', True) print json.dumps(c.listdir('/tmp/test_webhdfs')) time.sleep(1) print '\n## copyfromlocal: /etc/hosts to /tmp/test_webhdfs/test_hosts ##' c.copyfromlocal('/etc/hosts', '/tmp/test_webhdfs/test_hosts', True) time.sleep(1) print '\n## rename to /tmp/test_webhdfs/test_hosts_rename ##' c.rename('/tmp/test_webhdfs/test_hosts', '/tmp/test_webhdfs/test_hosts_rename') time.sleep(1) print '\n## open: /tmp/test_webhdfs/test_hosts_rename ##' print c.open('/tmp/test_webhdfs/test_hosts_rename') time.sleep(1)
def setUp(self): self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070, 'fabio') self.webHDFS.mkdir(TEST_DIR_PATH)
""" 날짜 : 2020/07/22 이름 : 김철학 내용 : 파이썬 Hadoop 실습하기 """ from pywebhdfs.webhdfs import PyWebHdfsClient as hadoop import webhdfspy hdfs = webhdfspy.WebHDFSClient('192.168.100.101', 50070, 'root') #print(hdfs.listdir('/')) #hdfs.mkdir('/test1') hdfs.copyfromlocal(local_path='/home/bigdata/naver', hdfs_path='/naver', overwrite=True) print('완료') #Hadoop 접속 #Local의 /home/bigdata/naver/naver-20-xx-xx를 하둡 /naver/ 복사 #Local의 /home/bigdata/naver/naver-20-xx-xx를 삭제 #프로그램 종료
import webhdfspy import pandas as pd webHDFS = webhdfspy.WebHDFSClient("host6.cloud.sinocbd.com", 50070, username='******') data = pd.DataFrame(webHDFS.listdir('/')) print(data) pathlist = data['pathSuffix'] for i in pathlist: path = "/" + pathlist # print(path) # print(webHDFS.listdir(path))
""" 날짜 : 2020/07/22 이름 : 김동욱 내용 : 파이썬 Hadoop 실습하기 """ #from pywebhdfs.webhdfs import PyWebHdfsClient as hadoop import webhdfspy #hadoop 접속 hdfs = webhdfspy.WebHDFSClient(host='192.168.100.101', port=50070, username='******') #HDFS 디렉토리 생성 hdfs.mkdir('/sample') #HDFS 파일 생성 text = 'Hello Hadoop! 반갑습니다.' hdfs.create('/sample/test.txt', text.encode('UTF-8'), overwrite=True) print('프로그램 종료...')
def setUp(self): self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070, HADOOP_USERNAME)