def tfUserTag(userId): usrObj = User(userId) movies = di.getUserMovies(userId) for movieId in movies: movieId = movieId[0] mv = Movie( movieId, 0) # Here the actor movie rank is not reqd., setting this to 0 movieTags = di.getMovieTags(movieId) for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) usrObj.addMovie(mv) tfVector = {} usrObj.setUnqTags() unqTags = usrObj.getUnqTags() #print(unqTags) for tagId in unqTags: tfFactorTag = 0 for movie in usrObj.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 for tag in searchTags: if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() #print(tfFactor) totalMovieWeight = totalMovieWeight + 1 if (totalMovieWeight != 0): tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def movieTagSpace(movieId): tagIds = di.getMovieTags(movieId) tagLen = len(tagIds) actorlist = di.getAllActors() actorNames = di.getAllActorNames() idfActVector = idf.idfActorTag() mov = di.getMovieActorIds(movieId) movieActors = [0 for i in range(len(mov))] for i in range(len(mov)): movieActors[i] = mov[i][0] mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))] newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))] for i in range(len(movieActors)): taglist = idf.tfIdfActorTag(movieActors[i], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): mat[i][j] = taglist[tagIds[j][0]] for i in range(0, len(actorlist)): if (actorlist[i][0] not in movieActors): taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): newMat[i][j] = taglist[tagIds[j][0]] actVect = [0 for i in range(tagLen)] for j in range(len(movieActors)): for i in range(tagLen): actVect[i] = actVect[i] + mat[j][i] dist = {} for i in range(len(newMat)): if (actorlist[i][0] not in movieActors): dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect) return utils.sortByValue(dist)[-10:]
def ldaInputTags(genre): movies = di.getGenreMovies(genre) movieTags = [] for movie in movies: arr = [] tags = di.getMovieTags(movie[0]) for tag in tags: arr.append(tag[0]) if (len(tags) != 0): movieTags.append(arr) return movieTags
def tfMovTag(movieId): mv = Movie(movieId, 0) # Here the actor movie rank is not reqd., setting this to 0 movieTags = di.getMovieTags(movieId) #print("tags are") #print(movieTags) for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) tfArray = utils.getGenreMovieTags(mv) return tfArray #print(tfMovTag('3854'))
def tfActorTag(actorId): movies = di.getActorMovies(actorId) #print(movies) actor = Actor(actorId) for movie in movies: # Here the first element in the entry is movieId and second is the actor rank movieId = movie[0] rank = movie[1] # Create the Movie obj and add to the Actor mv = Movie(movieId, rank) # Get the tags of movie movieTags = di.getMovieTags(movieId) #print(movieId) #print(movieTags) # Calculate the weight of the tags for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) actor.addMovie(mv) tfVector = {} actor.setUnqTags() unqTags = actor.getUnqTags() for tagId in unqTags: tfFactorTag = 0 #print("tagId "+tagId) for movie in actor.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 movRankWeight = movie.getRWeight() #print(movRankWeight) for tag in searchTags: #print(tag.getId()) if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() totalMovieWeight = totalMovieWeight + 1 #print(tfFactor) #print(totalMovieWeight) if ( totalMovieWeight != 0 ): # Check this condition because their are movies with no tags tfFactorTag = tfFactorTag + (movRankWeight * tfFactor) / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def formLdaMat1(numSemantics): movies1 = tuple(movies) tags1 = tuple(tagIds) movTag = [] for movie in movies1: arr = [] for i in range(allTagLen): arr.append(0) tgs = tuple(di.getMovieTags(movie[0])) for tag in tags1: for tg in tgs: if tag[0] == tg[0]: arr[tags1.index(tag)] += 1 movTag.append(arr) movTag = np.array(movTag) #print(movTag) model = lda.LDA(n_topics = numSemantics, n_iter = 100, random_state = 1) model.fit(movTag) return model.doc_topic_
def createGenObj(genre): genObj = Genre(genre) movies = di.getGenreMovies(genre) for movieId in movies: movieId = movieId[0] mv = Movie( movieId, 0) # Here the actor movie rank is not reqd., setting this to 0 movieTags = di.getMovieTags(movieId) #print("tags are") #print(movieTags) for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) genObj.addMovie(mv) #print("mv unq tags are") #print(mv.getUnqTags()) genObj.setUnqTags() return genObj
import dbInfo as db import numpy as np import persPageRank as ppr userId = input("\nGive User Id: ") movies = db.getAllMovies() movieNames = db.getAllMovieNames() tfmovies = {} for movieId in movies: Taglist = db.getMovieTags(movieId[0]) UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",") #print(UnqTags,movieId,Taglist) tfvect = {} for tag in UnqTags: tffact = 0 for t in Taglist: if (t[0] == tag): tffact += 1 tfvect[tag[0]] = tffact / len(Taglist) tfmovies[movieId[0]] = tfvect tagids = db.getAllTags() #print(tagids) movietf = np.zeros((len(tfmovies), len(tagids))) for i in range(len(tfmovies)): for j in range(len(tagids)): if (tagids[j][0] in tfmovies[movies[i][0]].keys()): movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]] matrix = np.matmul(movietf, np.transpose(movietf)) seedList = db.getUserMoviesRates(userId) seeds = []
import dbInfo as di import numpy as np import lda np.set_printoptions(threshold=np.nan) movies = di.getAllMovies() movies1 = tuple(movies) tags = di.getAllTags() tags1 = tuple(tags) n1 = len(movies1) n2 = len(tags) movTag = [] for movie in movies1: arr = [] for i in range(0,n2): arr.append(0) tgs = tuple(di.getMovieTags(movie[0])) for tag in tags1: for tg in tgs: if tag[0] == tg[0]: arr[tags1.index(tag)] += 1 print("i = ",tags1.index(tag)) movTag.append(arr) movTag = np.array(movTag) print(movTag) #model = lda.LDA(n_topics = 500, n_iter = 100, random_state = 1) #model.fit(movTag) #doc2topic = model.doc_topic_
with open('../testdata/movie-actor.csv', encoding='utf8') as movie_actor: entries = csv.DictReader(movie_actor) for entry in entries: #print(entry) myDbCurr.execute("insert into movie_actor values(?,?,?)",[entry['movieid'],entry['actorid'],entry['actor_movie_rank']]) myDbConn.commit() print("6") # Add tags to actors actors = di.getAllActors() for actor in actors: movies = di.getActorMovies(actor[0]) tags = '' for movie in movies: movieTags = di.getMovieTags(movie[0]) for movieTag in movieTags: if(tags == ''): tags = movieTag[0] elif(movieTag[0] not in tags): tags = tags + ',' + movieTag[0] myDbCurr.execute("update imdb_actor_info set tags = ? where actor_id = ?",(tags, actor[0])) myDbConn.commit() print("5") # Add tags to Genres myDbCurr.execute("select distinct genre from mlmovies") genres = myDbCurr.fetchall() for genre in genres: movies = di.getGenreMovies(genre[0]) tags = '' for movie in movies: