def save_to_hdfs(self, key, url, title, content): current_date = datetime.datetime.now().strftime("%Y%m%d") hdfs_path = hdfs_dir + current_date import sys reload(sys) sys.setdefaultencoding('utf-8') data = "\n" + key + "\n" + url + "\n" if (title != None and title != ''): data = data + title + "\n" if (content != None and content != ''): data = data + content + "\n" try: client = InsecureClient(hdfs_web, user=hdfs_user) client.write(hdfs_path=hdfs_path, data=data, append=True) except HdfsError, e: client.write(hdfs_path=hdfs_path, data=data)
def save_pd_DF(df_pd: pd.DataFrame, cli: InsecureClient, file_path): """ 将pandas的DataFrame写入hdfs csv :param df_pd: pandas的DataFrame :param cli: hdfs的InsecureClient :param file_path: hdfs的文件路径,相对InsecureClient里面设置的root路径 """ with cli.write(hdfs_path=file_path, encoding='utf-8', overwrite=True) as writer: df_pd.to_csv(writer)
arg_types = tuple([ llvm2impala[arg.pointee.name] for arg in function.type.pointee.args[1:] ]) functions.append((symbol, arg_types)) except (AttributeError, KeyError): # this process could fail for non-UDF helper functions...just ignore # them, because we're not going to be registering them anyway log("Had trouble with function %s; moving on..." % symbol) pass # transfer the LLVM module to HDFS url = 'http://{nn_host}:{webhdfs_port}'.format(nn_host=args.nn_host, webhdfs_port=args.webhdfs_port) hdfs_client = InsecureClient(url, user=args.user) hdfs_client.write(args.hdfs_path, bc, overwrite=args.force) log("Transferred LLVM IR to HDFS at %s" % args.hdfs_path) # register the functions with impala conn = impala.dbapi.connect(host=args.impala_host, port=args.impala_port) cursor = conn.cursor(user=args.user) log("Connected to impalad: %s" % args.impala_host) if args.db: cursor.execute('USE %s' % args.db) cursor.execute("SHOW FUNCTIONS") registered_functions = cursor.fetchall() for (udf_name, return_type) in zip(args.name, args.return_type): log("Registering function %s" % udf_name) # find matching LLVM symbols to the current UDF name matches = [pair for pair in functions if udf_name in pair[0]] if len(matches) == 0:
log("Loading types for function %s" % symbol) # skip the first argument, which is FunctionContext* arg_types = tuple([llvm2impala[arg.pointee.name] for arg in function.type.pointee.args[1:]]) functions.append((symbol, arg_types)) except (AttributeError, KeyError): # this process could fail for non-UDF helper functions...just ignore # them, because we're not going to be registering them anyway log("Had trouble with function %s; moving on..." % symbol) pass # transfer the LLVM module to HDFS url = 'http://{nn_host}:{webhdfs_port}'.format( nn_host=args.nn_host, webhdfs_port=args.webhdfs_port) hdfs_client = InsecureClient(url, user=args.user) hdfs_client.write(args.hdfs_path, bc, overwrite=args.force) log("Transferred LLVM IR to HDFS at %s" % args.hdfs_path) # register the functions with impala conn = impala.dbapi.connect(host=args.impala_host, port=args.impala_port) cursor = conn.cursor(user=args.user) log("Connected to impalad: %s" % args.impala_host) if args.db: cursor.execute('USE %s' % args.db) cursor.execute("SHOW FUNCTIONS") registered_functions = cursor.fetchall() for (udf_name, return_type) in zip(args.name, args.return_type): log("Registering function %s" % udf_name) # find matching LLVM symbols to the current UDF name matches = [pair for pair in functions if udf_name in pair[0]] if len(matches) == 0:
'localField': '_id.playerID', 'foreignField': 'playerID', 'as': 'playersinfo' } }, { '$lookup': { 'from': 'teamsfranchises', 'localField': 'teams._id.franchID', 'foreignField': 'franchID', 'as': 'teamsinfo' } } ]) return result results=count_hr() output_path="/users/mongo/Wang/output" # create connection with hdfs cluster hdfs_client = InsecureClient("http://localhost:9870/",user="******") # create a file hdfs_client.write(output_path, "",overwrite=False,append=False) # append data into the output for row in results: line = row['teamsinfo'][0]['franchName']+" "+row['playersinfo'][0]['nameFirst']+" "+row['playersinfo'][0]['nameLast']+" "+str(row['_id']['yearID']) hdfs_client.write(output_path, line, overwrite=False, append=True) hdfs_client.write(output_path, "\n", overwrite=False, append=True)