def getRevisedRanksPPR(semMatrix, seeds, rel, irr, objList, usrMovies): N = len(objList) # Number of movies numSemantics = len(semMatrix[0]) R = len(rel) Q = len(irr) ranks = {} r = np.zeros(numSemantics) q = np.zeros(numSemantics) n = np.zeros(numSemantics) p = np.zeros(numSemantics) u = np.zeros(numSemantics) #pre-calculating ri values for i in rel: #print("relevent =",semMatrix[i]) for sem in range(numSemantics): if (semMatrix[i][sem] != 0): r[sem] += 1 #pre-calculating qi values for i in irr: #print("irrelevant =", semMatrix[i]) for sem in range(numSemantics): if (semMatrix[i][sem] != 0): q[sem] += 1 #pre-calculating ni values for i in range(N): for sem in range(numSemantics): if (semMatrix[i][sem] != 0): n[sem] += 1 #pre-calculating pi and ui values for the formula for i in range(len(r)): p[i] = (r[i] + n[i] / N) / (R + 1) u[i] = (q[i] + n[i] / N) / (Q + 1) pSum = np.sum(p) uSum = np.sum(u) if (pSum == 0): pSum = 1 if (uSum == 0): uSum = 1 p = p / pSum u = u / uSum print("\np =", p) print("\nu =", u) for sem in range(numSemantics): nr = p[sem] * (1 - u[sem]) dr = u[sem] * (1 - p[sem]) #print(nr,dr) if (nr != 0 and dr != 0 and nr != dr): sim = math.log(nr / dr) else: sim = 1 for i in range(N): semMatrix[i][sem] = semMatrix[i][sem] * sim matrix = np.matmul(semMatrix, np.transpose(semMatrix)) seedMat = ppr.formSeed(seeds, objList) pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15) rankedRes = ppr.rankedList(pprOut, objList, usrMovies, N) #print(rankedRes) return rankedRes
#print(UnqTags,movieId,Taglist) tfvect = {} for tag in UnqTags: tffact = 0 for t in Taglist: if (t[0] == tag): tffact += 1 tfvect[tag[0]] = tffact / len(Taglist) tfmovies[movieId[0]] = tfvect tagids = db.getAllTags() #print(tagids) movietf = np.zeros((len(tfmovies), len(tagids))) for i in range(len(tfmovies)): for j in range(len(tagids)): if (tagids[j][0] in tfmovies[movies[i][0]].keys()): movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]] matrix = np.matmul(movietf, np.transpose(movietf)) seedList = db.getUserMoviesRates(userId) seeds = [] for seed in seedList: seeds.append(seed[0]) seedNames = [] for i in range(len(movies)): if (movies[i][0] in seeds): seedNames.append(movieNames[i][0]) #print("seed names = ", seedNames) seedMat = ppr.formSeed(seeds, movies) pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15) result = ppr.rankedList(pprOut, movieNames, seedNames, 5) print("\n5 Recomended Movies:\n\n", result)
def pprRes(matrix, seeds): seedMat = ppr.formSeed(seeds, movies) pprOut = ppr.personalizedPageRank(matrix, seedMat, 0.15) return ppr.rankedList(pprOut, movies, seeds, movieLen)
import numpy as np import utils import persPageRank as ppr import dbInfo as db import similarity #np.set_printoptions(threshold=np.nan) actTags = similarity.getActorTagMatrix() sim = np.matmul(actTags, np.transpose(actTags)) print("\n\nActor-Actor similarity matrix:\n", sim, "\n\nsize of matrix :", sim.shape) seeds = input("\nGive Seed Actors: ").split(",") actorNames = db.getAllActorNames() actorIds = db.getAllActors() seedNames = [] for i in range(len(actorIds)): if (actorIds[i][0] in seeds): seedNames.append(actorNames[i][0]) seedMat = ppr.formSeed(seeds, actorIds) pprOut = ppr.personalizedPageRank(sim, seedMat, 0.85) print("\n\n10 most related actors:\n") for act in ppr.rankedList(pprOut, actorNames, seedNames, 10): print(act)