import xgboost as xgb bst = xgb.Booster({'nthread': 4}) bst.load_model('xgb_model/xgb_v2.model') import numpy as np data = np.loadtxt('data/test_data.tsv', delimiter='\t') test_x = data[:, 1:] test_y = data[:, 0] dtest = xgb.DMatrix(test_x, label=test_y) eval = bst.predict(dtest) from tools import local_file_util file = [ line.split(',')[0] for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:] ] res = [line[0] + ',' + str(line[1]) for line in zip(file, list(eval))] res.insert(0, 'userid,orderType') local_file_util.writeFile('data/submit.csv', res)
userComment_train = [ l[0].split(',') for l in [ line.split('\"') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/userComment_test.csv') [1:] ] ] orderHistory_train = [ line.split(',') for line in local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/test/orderHistory_test.csv')[1:] ] userComment_train_dict = dict([[(line[0], line[1]), [line[2], line[3]]] for line in userComment_train]) merge_res = [] for orderHistory_train_line in orderHistory_train: add_line = [] userId_orderId = (orderHistory_train_line[0], orderHistory_train_line[1]) add_line = add_line + orderHistory_train_line if userId_orderId in userComment_train_dict: add_line = add_line + userComment_train_dict[userId_orderId] else: add_line = add_line + ['', ''] merge_res.append(add_line) local_file_util.writeFile('data/merge_orderHistory_userComment_test.csv', [','.join(line) for line in merge_res])
# -*- coding: utf-8 -* from tools import local_file_util file = map( lambda line: line.split(','), local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/trainingset/action_train.csv') [1:]) userId_actionTypeList_dict = {} for line in file: if line[0] in userId_actionTypeList_dict: temp = userId_actionTypeList_dict[line[0]] userId_actionTypeList_dict[line[0]] = temp + [line[2] + ':' + line[1]] else: userId_actionTypeList_dict[line[0]] = [line[2] + ':' + line[1]] save_str = sorted(map( lambda key: key + '\t' + str(userId_actionTypeList_dict[key].__len__( )) + '\t' + '\t'.join(userId_actionTypeList_dict[key]), userId_actionTypeList_dict), key=lambda line: int(line.split('\t')[1]), reverse=True) local_file_util.writeFile('data/userId_actionTypeNum.tsv', save_str) #userId actionNum(sort) time:actiontye time2:actiontype
# -*- coding: utf-8 -* from tools import local_file_util file = map(lambda line: line.split('\t'), local_file_util.readFile('data/user_orderNum.tsv')) orderNum_userId_dic = {} for userId_orderNum in file: if userId_orderNum[1] in orderNum_userId_dic: temp = orderNum_userId_dic[userId_orderNum[1]] + [userId_orderNum[0]] orderNum_userId_dic[userId_orderNum[1]] = temp else: orderNum_userId_dic[userId_orderNum[1]] = [userId_orderNum[0]] orderNum_userIdNum = sorted(map( lambda key: (key, orderNum_userId_dic[key].__len__()), orderNum_userId_dic), key=lambda tuple: tuple[1], reverse=True) save_str = map(lambda line: line[0] + '\t' + str(line[1]), orderNum_userIdNum) local_file_util.writeFile('data/orderNum_userIdNum.tsv', save_str)
# -*- coding: utf-8 -* #统计每个用户的订单数量 from tools import local_file_util file = map( lambda line: line.split(','), local_file_util.readFile( 'bigdata/huangbaoche/huangbaoche_unzip/trainingset/orderHistory_train.csv' )[1:]) userId_orderId_list = map(lambda line: (line[0], line[1]), file) dic = {} #userid: list(orderid1, oderid2) for userId_orderId in userId_orderId_list: if userId_orderId[0] in dic: temp = dic[userId_orderId[0]] + [userId_orderId[1]] dic[userId_orderId[0]] = temp else: dic[userId_orderId[0]] = [userId_orderId[1]] userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic), key=lambda tuple: tuple[1], reverse=True) res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum) local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)
str(num) for num in [ action_time_list.mean(), action_time_list.max(), action_time_list.min(), action_time_list.std(), action_time_list.size ] ] origin_train_line = origin_train_line + action_count_feat + action_type_count_feat + action_time_feat yield origin_train_line res = list(make_origin_train()) local_file_util.writeFile('data/orgin_train_data.tsv', ['\t'.join(line) for line in res]) #################################### origin_train_data = [ l.split('\t') for l in local_file_util.readFile('data/orgin_train_data.tsv') ] check = [ '\t'.join([str(i) + ':' + l[i] for i in range(l.__len__())]) for l in origin_train_data ] city_info = dict([[l[0], l[1]] for l in [ line.split('\t')