Python Review.replaceIDwithNum примеры использования

Язык программирования: Python

Пространство имен/Пакет: reviewMR

Класс/Тип: Review

Метод/Функция: replaceIDwithNum

Примеров на hotexamples.com: 2

Python Review.replaceIDwithNum - 2 примера найдено. Это лучшие примеры Python кода для reviewMR.Review.replaceIDwithNum, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

is_res(3)

replaceIDwithNum(2)

normalizeStar(1)

subtractAvg(1)

Пример #1

Показать файл

Файл: alsRecommend.py Проект: keyrrae/w16paraucsb

def parseReviewToTrainingSet(reviewRDD, testReviewRDD):
    reviewRestaRDD = reviewRDD.map(Review.toString).filter(lambda line: Review.is_res(line, restaurantListBC))
    userList = reviewRestaRDD.map(Review.getuserid).sortByKey().collect()
    restList = reviewRestaRDD.map(Review.getbusiid).sortByKey().collect()
    userListBC = sc.broadcast(userList)
    restListBC = sc.broadcast(restList)

    print(userList[10])

    '''
    Generate Dictionaries of users in training set and broadcast
    '''
    userIdToNumDict, userNumToIdDict = assignNum(userList)

    userIdToNumDictBC = sc.broadcast(userIdToNumDict)
    userNumToIdDictBC = sc.broadcast(userNumToIdDict)

    '''
    Generate Dictionaries of Restaurants in training set and broadcast
    '''
    restIdToNumDict, restNumToIdDict = assignNum(restList)

    restIdToNumDictBC = sc.broadcast(restIdToNumDict)
    restNumToIdDictBC = sc.broadcast(restNumToIdDict)


    userReviewRestaRDD = reviewRestaRDD.map(Review.mapper).reduceByKey(Review.reducer).map(Review.reshape)
    userReviewRestaCollNormRDD = userReviewRestaRDD.map(Review.normalize)  # Subtract average values
    userReviewRestaNormLst = userReviewRestaCollNormRDD.collect()  # map(Review.flatten).flatMap(Review.vectorize)
    userAvgDict = dict(userReviewRestaCollNormRDD.map(lambda x: (x[0], x[1])).collect())

    userReviewRestaLst = parseUserBusiLst(userReviewRestaNormLst, userIdToNumDict, restIdToNumDict)

    userReviewRestaNormRDD = sc.parallelize(userReviewRestaLst)
    #usrResStarTupleRDD = reviewRestaRDD.map(Review.getUsrResStar)


    testReviewRestRDD = testReviewRDD.map(Review.toString).map(Review.getUsrResStar)

    print(userReviewRestaNormRDD.take(10))
    testReviewRestRDD = testReviewRestRDD.filter(lambda x: x[0] in userListBC.value and x[1] in restListBC.value)\
        .map(lambda x: Review.normalizeStar(userAvgDict, x))\
        .map(lambda x: Review.replaceIDwithNum(x, userIdToNumDictBC, restIdToNumDictBC))
        #.filter(lambda x: x[0] != 0 and x[1] != 0)
    print(testReviewRestRDD.take(10))
 # and x[1] in restaurantListBC.value)\
    return userReviewRestaNormRDD, testReviewRestRDD

Пример #2

Показать файл

Файл: alsRecommend.py Проект: keyrrae/w16paraucsb

    numRestaurants = ratings.values().map(lambda r: r[1]).distinct().count()

    print("Got %d ratings from %d users on %d restaurants." % (numRatings, numUsers, numRestaurants))

    userList = ratings.values().map(lambda r: r[0]).distinct().collect()
    restList = ratings.values().map(lambda r: r[1]).distinct().collect()

    getUserIndex, getUserID = assignNum(userList)
    getRestIndex, getRestID = assignNum(restList)

    getUserIDBC = sc.broadcast(getUserID)
    getUserIndexBC = sc.broadcast(getUserIndex)
    getRestIndexBC = sc.broadcast(getRestIndex)
    getRestIDBC = sc.broadcast(getRestID)

    ratings = ratings.map(lambda (x, y): (x, Review.replaceIDwithNum(y, getUserIndexBC, getRestIndexBC)))
    usrRatingAvg = ratings.values().map(lambda x: (x[0], x[2])).reduceByKey(Review.reducer).map(Review.reshape)\
                    .filter(lambda x: len(x[1]) >= 3).map(Review.reshapeList)\
                    .map(lambda x: (x[0], sum(x[1])/float(len(x[1]))))
    usrRatingAvgBC = sc.broadcast(dict(usrRatingAvg.collect()))
    ratings = ratings.filter(lambda x: x[1][0] in usrRatingAvgBC.value).map(lambda (x, y): (x, Review.subtractAvg(y, usrRatingAvgBC)))

    numOfPartitions = 4

    trainingVal = ratings.filter(lambda x: x[0] <= 6) \
        .values()

    trainingMean = trainingVal.map(lambda x: x[2]).mean()

    training = trainingVal.repartition(numOfPartitions).cache()