Exemplo n.º 1
0
from tools import local_file_util

file = [
    line.split('\t')
    for line in local_file_util.readFile('data/orgin_train_data.tsv')
]

s = set([line[4] for line in file])
# -*- coding: utf-8 -*

from tools import local_file_util

userComment_train = [
    l[0].split(',') for l in [
        line.split('\"') for line in local_file_util.readFile(
            'bigdata/huangbaoche/huangbaoche_unzip/test/userComment_test.csv')
        [1:]
    ]
]

orderHistory_train = [
    line.split(',') for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/orderHistory_test.csv')[1:]
]

userComment_train_dict = dict([[(line[0], line[1]), [line[2], line[3]]]
                               for line in userComment_train])

merge_res = []
for orderHistory_train_line in orderHistory_train:
    add_line = []
    userId_orderId = (orderHistory_train_line[0], orderHistory_train_line[1])
    add_line = add_line + orderHistory_train_line

    if userId_orderId in userComment_train_dict:
        add_line = add_line + userComment_train_dict[userId_orderId]
    else:
        add_line = add_line + ['', '']
    merge_res.append(add_line)
Exemplo n.º 3
0
import xgboost as xgb

bst = xgb.Booster({'nthread': 4})
bst.load_model('xgb_model/xgb_v2.model')

import numpy as np

data = np.loadtxt('data/test_data.tsv', delimiter='\t')

test_x = data[:, 1:]
test_y = data[:, 0]
dtest = xgb.DMatrix(test_x, label=test_y)

eval = bst.predict(dtest)

from tools import local_file_util
file = [
    line.split(',')[0] for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:]
]

res = [line[0] + ',' + str(line[1]) for line in zip(file, list(eval))]
res.insert(0, 'userid,orderType')

local_file_util.writeFile('data/submit.csv', res)
# -*- coding: utf-8 -*

from tools import local_file_util

file = map(
    lambda line: line.split(','),
    local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/trainingset/action_train.csv')
    [1:])

userId_actionTypeList_dict = {}
for line in file:
    if line[0] in userId_actionTypeList_dict:
        temp = userId_actionTypeList_dict[line[0]]
        userId_actionTypeList_dict[line[0]] = temp + [line[2] + ':' + line[1]]
    else:
        userId_actionTypeList_dict[line[0]] = [line[2] + ':' + line[1]]

save_str = sorted(map(
    lambda key: key + '\t' + str(userId_actionTypeList_dict[key].__len__(
    )) + '\t' + '\t'.join(userId_actionTypeList_dict[key]),
    userId_actionTypeList_dict),
                  key=lambda line: int(line.split('\t')[1]),
                  reverse=True)

local_file_util.writeFile('data/userId_actionTypeNum.tsv', save_str)

#userId actionNum(sort) time:actiontye time2:actiontype
Exemplo n.º 5
0
# -*- coding: utf-8 -*

from tools import local_file_util

file = map(lambda line: line.split('\t'),
           local_file_util.readFile('data/user_orderNum.tsv'))

orderNum_userId_dic = {}

for userId_orderNum in file:
    if userId_orderNum[1] in orderNum_userId_dic:
        temp = orderNum_userId_dic[userId_orderNum[1]] + [userId_orderNum[0]]
        orderNum_userId_dic[userId_orderNum[1]] = temp
    else:
        orderNum_userId_dic[userId_orderNum[1]] = [userId_orderNum[0]]

orderNum_userIdNum = sorted(map(
    lambda key: (key, orderNum_userId_dic[key].__len__()),
    orderNum_userId_dic),
                            key=lambda tuple: tuple[1],
                            reverse=True)

save_str = map(lambda line: line[0] + '\t' + str(line[1]), orderNum_userIdNum)

local_file_util.writeFile('data/orderNum_userIdNum.tsv', save_str)
Exemplo n.º 6
0
# -*- coding: utf-8 -*
#统计每个用户的订单数量
from tools import local_file_util

file = map(
    lambda line: line.split(','),
    local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/trainingset/orderHistory_train.csv'
    )[1:])

userId_orderId_list = map(lambda line: (line[0], line[1]), file)

dic = {}

#userid: list(orderid1, oderid2)

for userId_orderId in userId_orderId_list:
    if userId_orderId[0] in dic:
        temp = dic[userId_orderId[0]] + [userId_orderId[1]]
        dic[userId_orderId[0]] = temp
    else:
        dic[userId_orderId[0]] = [userId_orderId[1]]

userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic),
                        key=lambda tuple: tuple[1],
                        reverse=True)

res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum)

local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)
Exemplo n.º 7
0
# -*- coding: utf-8 -*

#how to make test: 1: origin_train_line  =  ['-1', user_id] 2: trainningset to test 3:change save file
from tools import local_file_util
from itertools import groupby
import numpy as np

orderFuture_train = [
    line.split(',') for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/orderFuture_test.csv')[1:]
]

action_train = [
    line.split(',') for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/action_test.csv')[1:]
]

orderHistory_comment_train = [
    line.split(',') for line in local_file_util.readFile(
        'data/merge_orderHistory_userComment_test.csv')
]

userProfile_train = [
    line.split(',') for line in local_file_util.readFile(
        'bigdata/huangbaoche/huangbaoche_unzip/test/userProfile_test.csv')[1:]
]

continent_rate_dict = {
    '大洋洲': 1.4 / 0.33,
    '欧洲': 19.1 / 7.25,
    '非洲': 2.2 / 8.69,
Exemplo n.º 8
0
# -*- coding: utf-8 -*
#统计每个用户的订单数量
from tools import local_file_util

file =map(lambda line: line.split(','), local_file_util.readFile('bigdata/皇包车比赛/皇包车比赛数据-非压缩包/trainingset/orderHistory_train.csv')[1:])


userId_orderId_list = map(lambda line:(line[0], line[1]), file)


dic = {}

#userid: list(orderid1, oderid2)

for userId_orderId in userId_orderId_list:
    if userId_orderId[0] in dic:
        temp = dic[userId_orderId[0]]+ [userId_orderId[1]]
        dic[userId_orderId[0]] = temp
    else:
        dic[userId_orderId[0]] = [userId_orderId[1]]


userId_oderNum = sorted(map(lambda key: (key, dic[key].__len__()), dic), key=lambda tuple: tuple[1], reverse=True)

res_save_str = map(lambda line: line[0] + '\t' + str(line[1]), userId_oderNum)

local_file_util.writeFile('data/user_orderNum.tsv', res_save_str)