Пример #1
0
def trainAutorec(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items, GroundTruth,
                 itemCount, topN, m, h, r, o, l, p, s=False):
    # prepare training
    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, \
    lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData)
    
    with tf.Graph().as_default():
        data, mask, y, cost = models.Autorec(itemCount, h, r)

        # define optimizer
        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        printTrigger = True
        # Start training
        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)

            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize

                batchData = []
                batchMask = []
                
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    batchData.append(trainData[userId])
                    if s:
                        # random negative sampling
                        unrated = np.zeros(itemCount)
                        tmp = np.random.choice(unrated_items[userId], numOfRatings[userId], replace=False)

                        for j in tmp:
                            unrated[j] = 1
                        batchMask.append(unrated + trainMask[userId])
                    else:
                        batchMask.append(trainMask[userId])
                batchData = np.array(batchData)
                batchMask = np.array(batchMask)

                c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask})

            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))

            userID_list = []
            for userId in testData:
                userID_list.append(userId)

            # calculate accuracy measures
            preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount),
                                                             feed_dict={data: allTrainData,
                                                                        mask: unratedTrainMask})


            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \
            bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \
                evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG,
                                            bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures,
                                            lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[" + p + " / sampling:" + str(s) + " Done...")
    
    sess.close()
    
    be="ML100k"
    d="d1"
    st=False
    de=False
    ran=0
    a=0
    b=0
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount,
                       bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p,
                       s, st, de, r, h, o, l, ran, a, b)
Пример #2
0
def trainDAS(testData, trainData, trainData_i, trainMask, trainMask_i, unratedItemsMask, positiveMask, numOfRatings, unrated_items, GroundTruth, itemCount, topN, m, h, r, o, l, ran, a, b):

    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    
    with tf.Graph().as_default():
        data, mask, data_i, mask_i, _positiveMask, length_positive, y, cost = models.DualAPR_sigmoid (itemCount, h, r, ran, a, b)
        
        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        # prepare training
        userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData)
        printTrigger = True
            
        # Start training
        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer()) #initialize_all_variables
        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)

            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize

                batchData = []
                batchMask = []
                batchData_i = []
                batchMask_i = []
                batchPositiveMask = []
                len_positive = []
                
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    batchData.append(trainData[userId])
                    batchMask.append(trainMask[userId])
                    batchData_i.append(trainData_i[userId])
                    batchMask_i.append(trainMask_i[userId])
                    batchPositiveMask.append(positiveMask[userId])
                    len_positive.append(numOfRatings[userId])

                batchData = np.array(batchData)
                batchMask = np.array(batchMask)
                batchData_i = np.array(batchData_i)
                batchMask_i = np.array(batchMask_i)
                batchPositiveMask = np.array(batchPositiveMask)
                len_positive = np.array(len_positive)
                
                c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask, data_i: batchData_i, mask_i: batchMask_i, _positiveMask: batchPositiveMask, length_positive: len_positive})

            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))
         
            # calculate accuracy measures
            preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount),
                                                             feed_dict={data: allTrainData,
                                                                        mask: unratedTrainMask})

            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[a: %d, b: %.2f, rankingparam: %.2f] Done..." % (a, b, ran))
    
    sess.close()
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    be="ML100k"
    d="d1"
    p="None"
    s=False
    st=False
    de=False
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p, s, st, de, r, h, o, l, ran, a, b)
Пример #3
0
def trainSVDpp(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items,
             GroundTruth, itemCount, topN, m, h, r, o, l, p, userCount, s = False):
    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, \
    lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    userIdList_Train, allTrainData, allTestData, unratedTrainMask, test_ulist = dataprocess.prepareTrainAndTest(
        trainData,unratedItemsMask,testData)
    # print(userCount, itemCount)

    with tf.Graph().as_default():
        user_batch, item_batch, rate_batch, rating_list_batch, userImplicit_batch,\
        embd_y, infer, cost = models.SVDpp(user_num=userCount, item_num=itemCount, r=r,
                                           dim=DIM)

        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)
            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize
                batchUserId = []
                batchItemId = []
                batchRate = []
                rated_itemsIndex = []
                numOfRatings_list = []
                userImplicit_list = []
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    for j in range(itemCount):
                        if (trainMask[userId][j] != 0):
                            batchUserId.append(userId)
                            batchItemId.append(j)
                            batchRate.append(trainData[userId][j])
                            rated_itemsIndex.append(np.nonzero(trainData[userId]))
                            numOfRatings_list.append(numOfRatings[userId])

                for j in range(len(batchUserId)):
                    userImplicit = sess.run(embd_y, feed_dict={rating_list_batch:
                                                              rated_itemsIndex[j]})
                    userImplicit = np.reshape(userImplicit, (len(userImplicit[0]),
                                                                 len(userImplicit[0][0])))
                    userImplicit = np.sum(userImplicit, axis=0)
                    for k in range(DIM):
                        userImplicit[k] = (userImplicit[k]/math.sqrt(numOfRatings_list[j]))

                    userImplicit_list.append(userImplicit)

                c, _, predict = sess.run([cost, optimizer, infer], feed_dict={user_batch: batchUserId,
                                                              item_batch: batchItemId,
                                                              rate_batch: batchRate,
                                                              userImplicit_batch: userImplicit_list})


            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))

            predictRate = []
            for u in test_ulist:
                predictNumOfRate = numOfRatings[u]
                predictRateitemId = np.nonzero(testData[u])
                predictBatchUserId = []
                predictBatchItemId = []
                predictBatchUserImplicit = []
                for i in range(itemCount):
                    predictBatchUserId.append(u)
                    predictBatchItemId.append(i)

                tmp = sess.run(embd_y, feed_dict={rating_list_batch: predictRateitemId})
                tmp = np.reshape(tmp, (len(tmp[0]), len(tmp[0][0])))
                tmp = np.sum(tmp, axis=0)
                for k in range(DIM):
                    tmp[k] = (tmp[k] / math.sqrt(predictNumOfRate))
                for j in range(len(predictBatchUserId)):
                        predictBatchUserImplicit.append(tmp)

                rate = sess.run([infer], feed_dict={user_batch: predictBatchUserId,
                                                    item_batch: predictBatchItemId,
                                                    userImplicit_batch: predictBatchUserImplicit
                                                    })
                predictRate.append(list(rate[0]))

            predictRate = np.asarray(predictRate)
            predictedValues, predictedIndices = sess.run(tf.nn.top_k(predictRate * unratedTrainMask, itemCount))
            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \
            bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \
                evaluation.evaluation_topN(GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG,
                                           bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures,
                                           lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (
                    bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[" + p + " / sampling:" + str(s) + " Done...")

    sess.close()

    be = "ML100k"
    d = "d1"
    st = False
    de = False
    ran = 0
    a = 0
    b = 0
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount,
                       bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m,
                       p, s, st, de, r, h, o, l, ran, a, b)