Exemplo n.º 1
0
def main(k=100, user_sim=None, data_file=r'./data/movielens_data.pkl'):
    train, test = pickle.load(open(data_file, 'rb+'))
    train_itemset = stat_train_test_item(train)
    test_itemset = stat_train_test_item(test)
    trainset, test, item_len = utils.deal_train(data_file)
    cf = UserCF(trainset, test, item_len, k=k)
    degreedistrev = degree_item_map(cf)
    Ndegree_items, Nitemset = getNdegree_items(degreedistrev, N=10)
    start = time.time()
    print('calculate similarity.')
    if user_sim is None:
        user_sim = cf.similarity()
    else:
        cf.user_sim = user_sim
    print('calculate similarity finished. cost {:.2f}s'.format(time.time() -
                                                               start))

    start = time.time()
    recommend_score = cf.get_score(Nitemset)
    print('calculate recommender score. cost {:.2f}s'.format(time.time() -
                                                             start))
    user_degree = cf.cal_user_degree()
    item_score = get_item_score(user_degree, recommend_score, item_len)
    test_item_degree = get_test_degree(test)
    print('start trend predict.')
    corr_score = trend_predict(item_score,
                               Ndegree_items,
                               test_item_degree,
                               method='pearson')
    print(corr_score)
    return corr_score, recommend_score, user_sim
Exemplo n.º 2
0
def main(our_lambda=1,data_file=r'./data/movielens_data.pkl',recommend_score_file=r'./temp/cf_score.pkl', recommend_score=None):
    train, test = pickle.load(open(data_file, 'rb+'))
    train_itemset = stat_train_test_item(train)
    test_itemset = stat_train_test_item(test)
    trainset, test, item_len = utils.deal_train(data_file)
    cf = UserCF(trainset, test, item_len)
    degreedistrev = degree_item_map(cf)
    # get_item_degree_distribute(cf)
    print('start cf train.')
    if os.path.exists(recommend_score_file):
        # 判断cf是否训练过
        with open(recommend_score_file,'rb') as f:
            recommend_score = pickle.load(f) 
    else:
        recommend_score = cf.cf_train()
        with open(recommend_score_file,'wb') as f:
            pickle.dump(recommend_score, f)

    # if recommend_score is None:
    #     recommend_score = cf.cf_train()

    user_degree = cf.cal_user_degree()
    item_score = get_item_score(user_degree, recommend_score, item_len, our_lambda=our_lambda)
    Ndegree_items = getNdegree_items(degreedistrev, N=10)
    test_item_degree = get_test_degree(test)
    print('start trend predict.')
    corr_score = trend_predict(item_score, Ndegree_items,test_item_degree, 
                                train_itemset, test_itemset, method='pearson')
    print(corr_score)
    return corr_score, recommend_score
Exemplo n.º 3
0
def degree_item_map(cf):
    '''
    建立degree-item倒排表,用于统计训练集中item的degree信息
    '''
    item_degrees = cf.cal_item_degree()
    degreedistrev = {}
    for iid, degree in item_degrees.items():
        if degree not in degreedistrev:
            degreedistrev[degree] = []
        degreedistrev[degree].append(iid)
    return degreedistrev


if __name__ == "__main__":

    train_set, test, item_len = utils.deal_train(r'./data/movielens_data.pkl')
    cf = UserCF(train_set, item_len)
    cf.cal_user_degree()
    cf.similarity()

    # 定义一个浮点跳步器


    def frange(x, y, jump):
        while x < y:
            yield x
            x += jump

    rec_score = cf.get_item_score()

    degreedistrev = degree_item_map(cf)