示例#1
0
def getRevisedRanksPPR(semMatrix, seeds, rel, irr, objList, usrMovies):
    N = len(objList)  # Number of movies
    numSemantics = len(semMatrix[0])
    R = len(rel)
    Q = len(irr)
    ranks = {}
    r = np.zeros(numSemantics)
    q = np.zeros(numSemantics)
    n = np.zeros(numSemantics)
    p = np.zeros(numSemantics)
    u = np.zeros(numSemantics)
    #pre-calculating ri values
    for i in rel:
        #print("relevent =",semMatrix[i])
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                r[sem] += 1
    #pre-calculating qi values
    for i in irr:
        #print("irrelevant =", semMatrix[i])
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                q[sem] += 1
    #pre-calculating ni values
    for i in range(N):
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                n[sem] += 1
    #pre-calculating pi and ui values for the formula
    for i in range(len(r)):
        p[i] = (r[i] + n[i] / N) / (R + 1)
        u[i] = (q[i] + n[i] / N) / (Q + 1)
    pSum = np.sum(p)
    uSum = np.sum(u)
    if (pSum == 0): pSum = 1
    if (uSum == 0): uSum = 1
    p = p / pSum
    u = u / uSum
    print("\np =", p)
    print("\nu =", u)
    for sem in range(numSemantics):
        nr = p[sem] * (1 - u[sem])
        dr = u[sem] * (1 - p[sem])
        #print(nr,dr)
        if (nr != 0 and dr != 0 and nr != dr):
            sim = math.log(nr / dr)
        else:
            sim = 1
        for i in range(N):
            semMatrix[i][sem] = semMatrix[i][sem] * sim
    matrix = np.matmul(semMatrix, np.transpose(semMatrix))
    seedMat = ppr.formSeed(seeds, objList)
    pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15)
    rankedRes = ppr.rankedList(pprOut, objList, usrMovies, N)
    #print(rankedRes)
    return rankedRes
    #print(UnqTags,movieId,Taglist)
    tfvect = {}
    for tag in UnqTags:
        tffact = 0
        for t in Taglist:
            if (t[0] == tag):
                tffact += 1
        tfvect[tag[0]] = tffact / len(Taglist)
    tfmovies[movieId[0]] = tfvect
tagids = db.getAllTags()
#print(tagids)
movietf = np.zeros((len(tfmovies), len(tagids)))
for i in range(len(tfmovies)):
    for j in range(len(tagids)):
        if (tagids[j][0] in tfmovies[movies[i][0]].keys()):
            movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]]
matrix = np.matmul(movietf, np.transpose(movietf))
seedList = db.getUserMoviesRates(userId)
seeds = []
for seed in seedList:
    seeds.append(seed[0])
seedNames = []
for i in range(len(movies)):
    if (movies[i][0] in seeds):
        seedNames.append(movieNames[i][0])
#print("seed names = ", seedNames)
seedMat = ppr.formSeed(seeds, movies)
pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15)
result = ppr.rankedList(pprOut, movieNames, seedNames, 5)
print("\n5 Recomended Movies:\n\n", result)
def pprRes(matrix, seeds):
	seedMat = ppr.formSeed(seeds, movies)
	pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15)
	return ppr.rankedList(pprOut, movies, seeds, movieLen)
示例#4
0
import numpy as np
import utils
import persPageRank as ppr
import dbInfo as db
import similarity
#np.set_printoptions(threshold=np.nan)

actTags = similarity.getActorTagMatrix()
sim = np.matmul(actTags, np.transpose(actTags))
print("\n\nActor-Actor similarity matrix:\n", sim, "\n\nsize of matrix :",
      sim.shape)
seeds = input("\nGive Seed Actors: ").split(",")
actorNames = db.getAllActorNames()
actorIds = db.getAllActors()
seedNames = []
for i in range(len(actorIds)):
    if (actorIds[i][0] in seeds):
        seedNames.append(actorNames[i][0])
seedMat = ppr.formSeed(seeds, actorIds)
pprOut = ppr.personalizedPageRank(sim, seedMat, 0.85)
print("\n\n10 most related actors:\n")
for act in ppr.rankedList(pprOut, actorNames, seedNames, 10):
    print(act)