Пример #1
0
import xgboost as xgb

bst = xgb.Booster({'nthread': 4})
bst.load_model('xgb_model/xgb_v2.model')

import numpy as np

data = np.loadtxt('data/test_data.tsv', delimiter='\t')

test_x = data[:, 1:]
test_y = data[:, 0]
dtest = xgb.DMatrix(test_x, label=test_y)

eval = bst.predict(dtest)

from tools import local_file_util
file = [
    line.split(',')[0] for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:]
]

res = [line[0] + ',' + str(line[1]) for line in zip(file, list(eval))]
res.insert(0, 'userid,orderType')

local_file_util.writeFile('data/submit.csv', res)
userComment_train = [
    l[0].split(',') for l in [
        line.split('\"') for line in local_file_util.readFile(
            'bigdata/huangbaoche/huangbaoche_unzip/test/userComment_test.csv')
        [1:]
    ]
]

orderHistory_train = [
    line.split(',') for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/orderHistory_test.csv')[1:]
]

userComment_train_dict = dict([[(line[0], line[1]), [line[2], line[3]]]
                               for line in userComment_train])

merge_res = []
for orderHistory_train_line in orderHistory_train:
    add_line = []
    userId_orderId = (orderHistory_train_line[0], orderHistory_train_line[1])
    add_line = add_line + orderHistory_train_line

    if userId_orderId in userComment_train_dict:
        add_line = add_line + userComment_train_dict[userId_orderId]
    else:
        add_line = add_line + ['', '']
    merge_res.append(add_line)

local_file_util.writeFile('data/merge_orderHistory_userComment_test.csv',
                          [','.join(line) for line in merge_res])
# -*- coding: utf-8 -*

from tools import local_file_util

file = map(
    lambda line: line.split(','),
    local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/trainingset/action_train.csv')
    [1:])

userId_actionTypeList_dict = {}
for line in file:
    if line[0] in userId_actionTypeList_dict:
        temp = userId_actionTypeList_dict[line[0]]
        userId_actionTypeList_dict[line[0]] = temp + [line[2] + ':' + line[1]]
    else:
        userId_actionTypeList_dict[line[0]] = [line[2] + ':' + line[1]]

save_str = sorted(map(
    lambda key: key + '\t' + str(userId_actionTypeList_dict[key].__len__(
    )) + '\t' + '\t'.join(userId_actionTypeList_dict[key]),
    userId_actionTypeList_dict),
                  key=lambda line: int(line.split('\t')[1]),
                  reverse=True)

local_file_util.writeFile('data/userId_actionTypeNum.tsv', save_str)

#userId actionNum(sort) time:actiontye time2:actiontype
Пример #4
0
# -*- coding: utf-8 -*

from tools import local_file_util

file = map(lambda line: line.split('\t'),
           local_file_util.readFile('data/user_orderNum.tsv'))

orderNum_userId_dic = {}

for userId_orderNum in file:
    if userId_orderNum[1] in orderNum_userId_dic:
        temp = orderNum_userId_dic[userId_orderNum[1]] + [userId_orderNum[0]]
        orderNum_userId_dic[userId_orderNum[1]] = temp
    else:
        orderNum_userId_dic[userId_orderNum[1]] = [userId_orderNum[0]]

orderNum_userIdNum = sorted(map(
    lambda key: (key, orderNum_userId_dic[key].__len__()),
    orderNum_userId_dic),
                            key=lambda tuple: tuple[1],
                            reverse=True)

save_str = map(lambda line: line[0] + '\t' + str(line[1]), orderNum_userIdNum)

local_file_util.writeFile('data/orderNum_userIdNum.tsv', save_str)
Пример #5
0
# -*- coding: utf-8 -*
#统计每个用户的订单数量
from tools import local_file_util

file = map(
    lambda line: line.split(','),
    local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/trainingset/orderHistory_train.csv'
    )[1:])

userId_orderId_list = map(lambda line: (line[0], line[1]), file)

dic = {}

#userid: list(orderid1, oderid2)

for userId_orderId in userId_orderId_list:
    if userId_orderId[0] in dic:
        temp = dic[userId_orderId[0]] + [userId_orderId[1]]
        dic[userId_orderId[0]] = temp
    else:
        dic[userId_orderId[0]] = [userId_orderId[1]]

userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic),
                        key=lambda tuple: tuple[1],
                        reverse=True)

res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum)

local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)
Пример #6
0
                str(num) for num in [
                    action_time_list.mean(),
                    action_time_list.max(),
                    action_time_list.min(),
                    action_time_list.std(), action_time_list.size
                ]
            ]

        origin_train_line = origin_train_line + action_count_feat + action_type_count_feat + action_time_feat

        yield origin_train_line


res = list(make_origin_train())

local_file_util.writeFile('data/orgin_train_data.tsv',
                          ['\t'.join(line) for line in res])

####################################

origin_train_data = [
    l.split('\t')
    for l in local_file_util.readFile('data/orgin_train_data.tsv')
]

check = [
    '\t'.join([str(i) + ':' + l[i] for i in range(l.__len__())])
    for l in origin_train_data
]

city_info = dict([[l[0], l[1]] for l in [
    line.split('\t')