Python prepareTrainAndTest примеры использования

Язык программирования: Python

Пространство имен/Пакет: dataprocess

Метод/Функция: prepareTrainAndTest

Примеров на hotexamples.com: 3

Python prepareTrainAndTest - 3 примера найдено. Это лучшие примеры Python кода для dataprocess.prepareTrainAndTest, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def trainAutorec(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items, GroundTruth,
                 itemCount, topN, m, h, r, o, l, p, s=False):
    # prepare training
    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, \
    lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData)
    
    with tf.Graph().as_default():
        data, mask, y, cost = models.Autorec(itemCount, h, r)

        # define optimizer
        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        printTrigger = True
        # Start training
        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)

            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize

                batchData = []
                batchMask = []
                
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    batchData.append(trainData[userId])
                    if s:
                        # random negative sampling
                        unrated = np.zeros(itemCount)
                        tmp = np.random.choice(unrated_items[userId], numOfRatings[userId], replace=False)

                        for j in tmp:
                            unrated[j] = 1
                        batchMask.append(unrated + trainMask[userId])
                    else:
                        batchMask.append(trainMask[userId])
                batchData = np.array(batchData)
                batchMask = np.array(batchMask)

                c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask})

            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))

            userID_list = []
            for userId in testData:
                userID_list.append(userId)

            # calculate accuracy measures
            preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount),
                                                             feed_dict={data: allTrainData,
                                                                        mask: unratedTrainMask})


            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \
            bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \
                evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG,
                                            bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures,
                                            lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[" + p + " / sampling:" + str(s) + " Done...")
    
    sess.close()
    
    be="ML100k"
    d="d1"
    st=False
    de=False
    ran=0
    a=0
    b=0
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount,
                       bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p,
                       s, st, de, r, h, o, l, ran, a, b)

Пример #2

Показать файл

def trainDAS(testData, trainData, trainData_i, trainMask, trainMask_i, unratedItemsMask, positiveMask, numOfRatings, unrated_items, GroundTruth, itemCount, topN, m, h, r, o, l, ran, a, b):

    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    
    with tf.Graph().as_default():
        data, mask, data_i, mask_i, _positiveMask, length_positive, y, cost = models.DualAPR_sigmoid (itemCount, h, r, ran, a, b)
        
        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        # prepare training
        userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData)
        printTrigger = True
            
        # Start training
        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer()) #initialize_all_variables
        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)

            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize

                batchData = []
                batchMask = []
                batchData_i = []
                batchMask_i = []
                batchPositiveMask = []
                len_positive = []
                
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    batchData.append(trainData[userId])
                    batchMask.append(trainMask[userId])
                    batchData_i.append(trainData_i[userId])
                    batchMask_i.append(trainMask_i[userId])
                    batchPositiveMask.append(positiveMask[userId])
                    len_positive.append(numOfRatings[userId])

                batchData = np.array(batchData)
                batchMask = np.array(batchMask)
                batchData_i = np.array(batchData_i)
                batchMask_i = np.array(batchMask_i)
                batchPositiveMask = np.array(batchPositiveMask)
                len_positive = np.array(len_positive)
                
                c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask, data_i: batchData_i, mask_i: batchMask_i, _positiveMask: batchPositiveMask, length_positive: len_positive})

            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))
         
            # calculate accuracy measures
            preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount),
                                                             feed_dict={data: allTrainData,
                                                                        mask: unratedTrainMask})

            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[a: %d, b: %.2f, rankingparam: %.2f] Done..." % (a, b, ran))
    
    sess.close()
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    be="ML100k"
    d="d1"
    p="None"
    s=False
    st=False
    de=False
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p, s, st, de, r, h, o, l, ran, a, b)

Пример #3

Показать файл

def trainSVDpp(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items,
             GroundTruth, itemCount, topN, m, h, r, o, l, p, userCount, s = False):
    bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, \
    lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN)
    userIdList_Train, allTrainData, allTestData, unratedTrainMask, test_ulist = dataprocess.prepareTrainAndTest(
        trainData,unratedItemsMask,testData)
    # print(userCount, itemCount)

    with tf.Graph().as_default():
        user_batch, item_batch, rate_batch, rating_list_batch, userImplicit_batch,\
        embd_y, infer, cost = models.SVDpp(user_num=userCount, item_num=itemCount, r=r,
                                           dim=DIM)

        if o == 1:
            optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost)
        elif o == 2:
            optimizer = tf.train.RMSPropOptimizer(l).minimize(cost)
        else:
            optimizer = tf.train.AdamOptimizer(l).minimize(cost)

        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochCount):
            random.shuffle(userIdList_Train)
            for batchId in range(int(len(userIdList_Train) / batchSize)):
                start = batchId * batchSize
                end = start + batchSize
                batchUserId = []
                batchItemId = []
                batchRate = []
                rated_itemsIndex = []
                numOfRatings_list = []
                userImplicit_list = []
                for i in range(start, end):
                    userId = userIdList_Train[i]
                    for j in range(itemCount):
                        if (trainMask[userId][j] != 0):
                            batchUserId.append(userId)
                            batchItemId.append(j)
                            batchRate.append(trainData[userId][j])
                            rated_itemsIndex.append(np.nonzero(trainData[userId]))
                            numOfRatings_list.append(numOfRatings[userId])

                for j in range(len(batchUserId)):
                    userImplicit = sess.run(embd_y, feed_dict={rating_list_batch:
                                                              rated_itemsIndex[j]})
                    userImplicit = np.reshape(userImplicit, (len(userImplicit[0]),
                                                                 len(userImplicit[0][0])))
                    userImplicit = np.sum(userImplicit, axis=0)
                    for k in range(DIM):
                        userImplicit[k] = (userImplicit[k]/math.sqrt(numOfRatings_list[j]))

                    userImplicit_list.append(userImplicit)

                c, _, predict = sess.run([cost, optimizer, infer], feed_dict={user_batch: batchUserId,
                                                              item_batch: batchItemId,
                                                              rate_batch: batchRate,
                                                              userImplicit_batch: userImplicit_list})


            print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c))

            predictRate = []
            for u in test_ulist:
                predictNumOfRate = numOfRatings[u]
                predictRateitemId = np.nonzero(testData[u])
                predictBatchUserId = []
                predictBatchItemId = []
                predictBatchUserImplicit = []
                for i in range(itemCount):
                    predictBatchUserId.append(u)
                    predictBatchItemId.append(i)

                tmp = sess.run(embd_y, feed_dict={rating_list_batch: predictRateitemId})
                tmp = np.reshape(tmp, (len(tmp[0]), len(tmp[0][0])))
                tmp = np.sum(tmp, axis=0)
                for k in range(DIM):
                    tmp[k] = (tmp[k] / math.sqrt(predictNumOfRate))
                for j in range(len(predictBatchUserId)):
                        predictBatchUserImplicit.append(tmp)

                rate = sess.run([infer], feed_dict={user_batch: predictBatchUserId,
                                                    item_batch: predictBatchItemId,
                                                    userImplicit_batch: predictBatchUserImplicit
                                                    })
                predictRate.append(list(rate[0]))

            predictRate = np.asarray(predictRate)
            predictedValues, predictedIndices = sess.run(tf.nn.top_k(predictRate * unratedTrainMask, itemCount))
            printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \
            bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \
                evaluation.evaluation_topN(GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG,
                                           bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures,
                                           lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c)

            if printTrigger == True:
                print("%.4f   %.4f   %.4f   %.4f   %.4f" % (
                    bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR))
            else:
                print("[" + p + " / sampling:" + str(s) + " Done...")

    sess.close()

    be = "ML100k"
    d = "d1"
    st = False
    de = False
    ran = 0
    a = 0
    b = 0
    # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr
    printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount,
                       bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m,
                       p, s, st, de, r, h, o, l, ran, a, b)