Exemplo n.º 1
0
def userExperiment1():

    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    print "Distinct Products Count :", n
    print "Distinct users count :", userLen

    # function calculates the pearson coeff
    pairwiseUserSimilarity(30)  # 30 Common Products Per user

    #function calcualtes the recommendation success
    userBasedCollabFiltering()
Exemplo n.º 2
0
def userExperiment2():

    # Function below removes the data where the sentiment analysis prediction doesnt match with the actual rating
    resetIgnore()

    distProdId = getdistinctProduct(isTrain)
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)
    userLen = len(distUserId)

    print "Distinct Products Count :", n
    print "Distinct users count :", userLen

    # function calculates the pearson coeff
    pairwiseUserSimilarity(30)  # 30 Common Products Per User

    #function calcualtes the recommendation success
    userBasedCollabFiltering()
def experiment1():

	distProdId = getdistinctProduct(isTrain) # has the distinct product id's
	n = len(distProdId)

	distUserId = getdistinctUser(isTrain) # has the distinct product id's
	userLen = len(distUserId)

	print "Distinct Products Count :", n
	print "Distinct users count :", userLen
	

	# funtion deletes existing similarity already if inserted
	deleteSimilarityFromDB()
	
	# function calculates the pearson coeff
	pairwiseProdSimilarity(30) # 30 Common users Per Product

	#function calculates the recommendation success
	productBasedCollabFiltering()
	comparisonPercentageRecVSTest()
def experiment2():

	# Function below removes the data where the sentiment analysis prediction doesnt match with the actual rating 
	resetIgnore()

	distProdId = getdistinctProduct(isTrain)
	n = len(distProdId)

	distUserId = getdistinctUser(isTrain)
	userLen = len(distUserId)

	print "Distinct Products Count :", n
	print "Distinct users count :", userLen

	
	# funtion deletes existing similarity already if inserted
	deleteSimilarityFromDB()
	
	# function calculates the pearson coeff
	pairwiseProdSimilarity(30) # 30 Common users Per Product

	#function calcualtes the recommendation success
	productBasedCollabFiltering()
	comparisonPercentageRecVSTest()
from queryData import resetIgnore
from queryData import insertRecomendationData
from queryData import getRecommendationData
from queryData import deleteRecommendationData
import math

from sets import Set
import math
import operator



isTrain = True #flag which specifies to fetch the data from the train table
expId = 1

distProdId = getdistinctProduct(isTrain) # has the distinct product id's
n = len(distProdId)


distUserId = getdistinctUser(isTrain) # has the distinct user id's
userLen = len(distUserId)


#function to calculate item-based CF for different length of common users
def pairwiseProdSimilarity(commonUserInterLen):
	userResult1 = {}  
	userResult2 = {}
	productToUsers = {}
	productToUserResult = {}

	#gets the users and rating for each distinct product from the train 
Exemplo n.º 6
0
def pairwiseUserSimilarity(commonProdInterLen):
    deleteUserSimilarityFromDB()
    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    print userLen

    print "distinct Users Cnt ", userLen

    productResult1 = {}
    productResult2 = {}
    userToProducts = {}
    userToProductResult = {}

    for u in range(0, userLen):
        productResult = getProductOfUsers(distUserId[u], isTrain)
        userToProductResult[u] = productResult
        productSet = set(productResult.keys())
        userToProducts[u] = productSet

    #print productResult

    #print userToProductResult

    #print userToProducts

    for u in range(0, userLen):

        if u % 1000 == 0:
            print "done  ", u

        productResult1 = userToProducts[u]

        for v in range(u + 1, userLen):

            productResult2 = userToProducts[v]
            commonProducts = productResult1.intersection(productResult2)
            commonProductsLen = len(commonProducts)
            # if commonProductsLen > 0:
            # 	print  commonProductsLen

            if commonProductsLen > commonProdInterLen:
                #print
                r_pu_sum = 0
                r_pv_sum = 0

                for p in commonProducts:
                    try:
                        r_pu_sum = r_pu_sum + userToProductResult[u][p]
                        r_pv_sum = r_pv_sum + userToProductResult[v][p]
                    except Exception, e:
                        continue

                numerator = 0
                factor1 = 0
                factor2 = 0
                denominator1 = 0
                denominator2 = 0

                ru_mean = float(r_pu_sum) / commonProductsLen
                rv_mean = float(r_pv_sum) / commonProductsLen

                for p in commonProducts:
                    try:
                        r_pu = userToProductResult[u][p]
                        r_pv = userToProductResult[v][p]

                        factor1 = r_pu - ru_mean
                        factor2 = r_pv - rv_mean

                        numerator = numerator + (factor1 * factor2)

                        denominator1 = denominator1 + (factor1 * factor1)
                        denominator2 = denominator2 + (factor2 * factor2)

                    except Exception, e:
                        continue

                if denominator1 == 0 or denominator2 == 0:
                    sim_u_v = 1
                else:
                    sim_u_v = numerator / (math.sqrt(denominator1) *
                                           math.sqrt(denominator2))

                # if distUserId[u] == 'A10FL3MC9QFZ7P' and distUserId[v] == 'A1H5CRR7Z4SV9F':
                # 	print r_pu_sum,r_pv_sum,commonProducts
                # 	print distUserId[u],distUserId[v],commonProductsLen,ru_mean,rv_mean,denominator1,denominator2,numerator
                # 	print "Similarity: ",u,v,sim_u_v

                insertUserSimilarity(distUserId[u], distUserId[v], sim_u_v)
Exemplo n.º 7
0
def userBasedCollabFiltering():

    #print "distinct users",userLen

    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    result = {}
    productsToUser = {}
    productToUserResult = {}

    totalRecommendations = 0
    totalHits = 0

    for i in range(0, n):
        userResult = getUsersOfProduct(distProdId[i], isTrain)
        productToUserResult[i] = userResult
        userSet = set(userResult.keys())
        productsToUser[i] = userSet

    similarUserMap = {}
    for j in range(0, userLen):
        similarUserResult = getSimilarUser(distUserId[j])
        similarUserMap[j] = similarUserResult

    # for all products
    for i in range(0, n):

        usersRatedInTest = getUsersOfProduct(distProdId[i], False)
        userSetTest = set(usersRatedInTest.keys())

        numRecommendations = 0
        numHits = 0
        if i % 10000 == 0:
            print "done", i

        recommendation = {}
        usersRated = productsToUser[i]
        usersRatedResult = productToUserResult[i]

        # for all users
        for u in range(0, userLen):
            # already rated
            if u in usersRated:
                continue

            similarToUser_u = similarUserMap[u]
            commonUserIds = usersRated.intersection(similarToUser_u.keys())

            if len(commonUserIds) > 0:

                numerator = float(0)
                denominator = float(0)

                for uId in commonUserIds:
                    simScore = similarToUser_u[uId]

                    if simScore == 0:
                        continue
                    try:
                        numerator += simScore * usersRatedResult[uId]
                        denominator += math.fabs(simScore)
                    except:
                        continue

                # no new product to recommend
                if denominator == 0:
                    continue

                recommendationScore = numerator / denominator
                recommendation[u] = recommendationScore

        if expId == 1:
            #print recommendation
            for j in recommendation.keys():

                if recommendation[j] > 0:
                    numRecommendations += 1
                    if distUserId[j] in userSetTest:
                        #print "Recommnding ",j
                        numHits += 1

        totalRecommendations += numRecommendations
        totalHits += numHits

    try:
        rec_success = 0
        if totalRecommendations == 0:
            rec_success = 0
        else:
            rec_success = float(totalHits) / float(totalRecommendations)

        print "Total Rec : ", totalRecommendations, " Total Hits", totalHits, " Recomendation Success : ", rec_success
    except:
        print " "