Exemplo n.º 1
0
def main(lineRate, alpha, topN, testN):
    print("加载数据...")
    train, test = tool.splitData(lineRate)
    # train, test = tool.splitDataMock()
    M, i2rI, r2iU, iStart = initData(train)
    rankList = dict()
    print("进行推荐...")
    tN = 0
    testNew = dict()
    for user in test:
        if tN == testN:
            break
        tN += 1
        print("\r%d/%d" % (tN, testN), end='')
        testNew[user] = test[user]
        if user not in train:
            rankList[user] = []
        rankList[user] = recommend(r2iU[user], M, alpha, iStart, i2rI, topN,
                                   train[user])
    print()
    eva = tool.Evaluator(train,
                         testNew,
                         rankList,
                         dataScale=lineRate,
                         topN=topN)
    eva.show()
Exemplo n.º 2
0
def splitDataNumpy(M, k, seed=1):
    train, test = tool.splitData(1, M, k, seed)
    m = 6040
    n = 3952
    trainM = mat(zeros((m, n)))  # 最好使用默认float类型进行矩阵计算
    for user, items in train.items():
        for item in items:
            trainM[user - 1, item - 1] = 1
    return trainM, train, test
Exemplo n.º 3
0
def itemCF(K, topItem, dataScale):
    print("加载数据...")
    train, test = tool.splitData(dataScale, 8, 0)
    rankList = dict()
    W = itemSimilartiy(train)
    print("进行推荐...")
    # 对test集的user进行推荐
    for user in test.keys():
        rankList[user] = recommendation(train, user, W, K, topItem)
    print("进行评估...")
    eva = tool.Evaluator(train, test, rankList, topN=K, dataScale=dataScale)
    eva.show()
Exemplo n.º 4
0
def userCF(userN, itemN, dataScale):
    print("加载数据...")
    train, test = tool.splitData(dataScale, 8, 0)
    rankList = dict()
    W = userSimilarity(train)
    print("进行推荐...")
    # 对test集的user进行推荐
    for user in test.keys():
        rankList[user] = recommend(user, train, W, userN, itemN)
    print("进行评估...")
    eva = tool.Evaluator(train, test, rankList, userN, dataScale)
    eva.show()
Exemplo n.º 5
0
def lfmRecommend(ratio, N, lineRate, topN=10):
    print("加载数据...")
    train, test = splitData(lineRate)
    print("生成参数...")
    P, Q = latentFactorModel(train, ratio, N)
    rankList = dict()
    print("进行推荐...")
    for user in test.keys():
        if user not in train:
            rankList[user] = {}
            continue
        rankList[user] = recommend(user, P, Q, topN, train)
    print("进行评估...")
    eva = Evaluator(train, test, rankList, ratio=ratio, dataScale=lineRate, loopN=N)
    eva.show()
def lfmRecommend(N, alpha, rate, topN=10):
    print("加载数据...")
    train, test = splitData(1)
    print("生成参数...")
    sortedUserItem = latentFactorModel(train, N, alpha, rate)
    print("进行推荐...")
    rankList = dict()
    itemSet = set()
    for items in train.values():
        itemSet.update(items)
    for items in test.values():
        itemSet.update(items)
    for user in test.keys():
        if user not in train:
            rankList[user] = {}
            continue
        rankList[user] = [(item, 1) for item in sortedUserItem[user]
                          if item not in train[user] and item in itemSet
                          ][:topN]
    print("进行评估...")
    eva = Evaluator(train, test, rankList, loopN=N)
    eva.show()
Exemplo n.º 7
0
    def RFsDP(self, hX, hy, privacy, num_learner, max_height, n_rows):
        '''
        每个用户本地RFsDP
        '''
        # 第p个用户lambda_p=Np/N
        lamb = float(hX.shape[0] / n_rows)
        # 第p个用户的训练集和验证集
        hX_train, hy_train, hX_val, hy_val = splitData(hX, hy)

        rfs_p = MyRandomForest(X=hX_train,
                               y=hy_train,
                               privacy_p=privacy,
                               num_learners=num_learner,
                               max_depth=max_height)
        predRF_p = rfs_p.predict(hX_val)

        # 第p个用户分类器准确度
        acc = accuracy_score(hy_val, predRF_p, normalize=True)
        #        print "acc", acc
        # 第p个用户w函数值
        lamb_acc = math.exp(lamb) * acc
        return lamb_acc, rfs_p
Exemplo n.º 8
0
    def AdaBoostDP(self, hX, hy, privacy, num_learner, height, n_rows):
        '''
        每个用户本地AdaboostDP
        '''
        # 第p个用户lambda_p=Np/N
        lamb = float(hX.shape[0] / n_rows)
        # 第p个用户的训练集和验证集
        hX_train, hy_train, hX_val, hy_val = splitData(hX, hy)

        ada_p = AdaboostDTBinaryClassifier(privacy_p=privacy,
                                           num_learners=num_learner,
                                           max_depth=height)
        ada_p.fit(hX_train, hy_train)
        # 对比论文13 2016年
        #       alpha_p.append(ada_p.learner_weight_)
        #       Ada_set_p.append(ada_p.learners_)

        # 预测
        predAda_p = ada_p.predict(hX_val)
        # 第p个用户分类器准确度
        acc = accuracy_score(hy_val, predAda_p, normalize=True)
        lamb_acc = math.exp(lamb) * acc
        return lamb_acc, ada_p