Python getAllTags示例，dbInfo.getAllTags Python示例

示例#1

0

显示文件

def loadDataset(filename, trainingSet=[] , testSet=[]):
	test=[]
	with open(filename, 'r') as csvfile:
		lines = csv.reader(csvfile)
		dataset = list(lines)
	movies = di.getAllMovies()
	tagIds = di.getAllTags()
	allTagLen = len(tagIds)
	dataset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies))]
	idfMovArr = idf.idfMovieTag()
	for i in range(len(dataset)):
		idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
		for j in range(len(idfVect)):
			dataset_copy[i][j] = idfVect[j]
		dataset_copy[i][allTagLen]=dataset[i][1]
		trainingSet.append(dataset_copy[i])
	train = [0 for i in range(len(dataset))]
	for i in range(len(dataset)):
		train[i] = int(dataset[i][0])
	k=0
	labels = ['0', '1']
	testset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies)-len(train))]
	for i in range(len(movies)):
			if(int(movies[i][0]) in train):
				pass
			else:
				test.append(movies[i][0])
				idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
				for j in range(len(idfVect1)):
					testset_copy[k][j] = idfVect1[j]
				#testset_copy[k][allTagLen]=db.getMovieGenre(movies[i][0])[0]
				testset_copy[k][allTagLen]=random.choice(labels)
				testSet.append(testset_copy[k])
				k=k+1
	return test,trainingSet,testSet

示例#2

0

显示文件

文件： tensorDecomp.py 项目： giriraj34/MovieRecommendationSystem

def vectActMovTag():
    actors = di.getAllActors()
    tags = di.getAllTags()
    movies = di.getAllMovies()
    years = di.getAllYears()
    movYearsArray = di.getAllMovieYrs()
    movYears = {}
    for arr in movYearsArray:
        movYears[arr[0]] = arr[1]
    #print("movYears", movYears)
    actMoviesDb = {}
    for act in actors:
        actMovies = di.getActorMovieIds(act[0])
        actMov = []
        for mov in actMovies:
            actMov.append(mov[0])
        actMoviesDb[act[0]] = actMov
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        #print(len(movTags))
        for act in actors:
            actMovies = actMoviesDb[act[0]]
            #print("actMovies:",actMovies)
            for tag in tags:
                #print("tag",tag[0])
                vect[mov[0]][act[0]][tag[0]] = 0
                #print("i am here")
                # Set the value to 1 if the given cond. is satisfied
                if ((mov[0] in actMovies) and
                    (tag[0] in movTags)):  #and (movYears[mov[0]] == yr[0])):
                    vect[act[0]][mov[0]][tag[0]] = movYears[mov[0]]
    #print(vect['1'])
    return (vect, actors, movies, years)

示例#3

0

显示文件

文件： similarity.py 项目： giriraj34/MovieRecommendationSystem

def getActorTagMatrix():
    tagIds = di.getAllTags()
    tagLen = len(tagIds)
    actorNames = di.getAllActorNames()
    actorlist = di.getAllActors()
    actorTags = np.zeros((len(actorlist), tagLen))
    i = 0
    idfActVector = idf.idfActorTag()
    for actor in actorlist:
        actVect = idf.tfIdfActorTag(actor[0], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in actVect.keys()):
                actorTags[i][j] = actVect[tagIds[j][0]]
        i += 1
    return actorTags

示例#4

0

显示文件

def idfActorTag():
    idfActVect = {}
    allTags = di.getAllTags()
    allActors = di.getAllActors()
    actorCount = len(allActors)
    for tag in allTags:
        tagCount = 0
        idfActVect[tag[0]] = 0
        for actor in allActors:
            tags = di.getActorTags(actor[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfActVect[tag[0]] = math.log(actorCount / tagCount)
    #print(idfActVect)
    return idfActVect

示例#5

0

显示文件

def idfUserTag():
    idfUserVect = {}
    allTags = di.getAllTags()
    allUsers = di.getAllUsers()
    userCount = len(allUsers)
    for tag in allTags:
        tagCount = 0
        idfUserVect[tag[0]] = 0
        for user in allUsers:
            tags = di.getUserTags(user[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfUserVect[tag[0]] = math.log(userCount / tagCount)
    #print(idfUserVect)
    return idfUserVect

示例#6

0

显示文件

def idfGenreTag():
    idfGenVect = {}
    allTags = di.getAllTags()
    allGenres = di.getAllGenres()
    genreCount = len(allGenres)
    for tag in allTags:
        tagCount = 0
        idfGenVect[tag[0]] = 0
        for genre in allGenres:
            tags = di.getGenreTags(genre[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfGenVect[tag[0]] = math.log(genreCount / tagCount)
    #print(idfGenVect)
    return idfGenVect

示例#7

0

显示文件

文件： svm-1.py 项目： giriraj34/MovieRecommendationSystem

def loadDataset(filename, trainingSet=[], testSet=[]):

    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
    labels = ['' for i in range(len(dataset))]
    movies = di.getAllMovies()
    tagIds = di.getAllTags()
    allTagLen = len(tagIds)
    dataset_copy = [['' for i in range(allTagLen)]
                    for j in range(len(dataset))]
    #dataset_copy = numpy.zeros((len(movies),allTagLen+1))
    #dataset_copy = [[0 for i in range(allTagLen+1)] for j in range(len(movies))]
    idfMovArr = idf.idfMovieTag()
    #print(idfMovArr)
    for i in range(len(dataset)):
        idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
        for j in range(len(idfVect)):
            dataset_copy[i][j] = idfVect[j]
        #dataset_copy[i][allTagLen]=dataset[i][1]
        labels[i] = dataset[i][1]
        trainingSet.append(dataset_copy[i])
    train = [0 for i in range(len(dataset))]

    target = ['' for i in range(len(movies))]
    for i in range(len(dataset)):
        train[i] = int(dataset[i][0])
    k = 0
    test = []
    label = ['0', '1']
    testset_copy = [['' for i in range(allTagLen)] for j in range(len(movies))]
    for i in range(len(movies)):
        if (int(movies[i][0]) in train):
            pass
        else:
            test.append(movies[i][0])
            idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
            for j in range(len(idfVect1)):
                testset_copy[k][j] = idfVect1[j]
            #testset_copy[k][allTagLen]=di.getMovieGenre(movies[i][0])[0]
            #testset_copy[k][allTagLen]=random.choice(labels)
            target[k] = random.choice(label)
            testSet.append(testset_copy[k])
            k = k + 1
    #print("train data =",trainingSet)
    #print("\n\n test data =",testSet)
    return trainingSet, testSet, labels, target, test

示例#8

0

显示文件

文件： utils.py 项目： giriraj34/MovieRecommendationSystem

def getGenreMovieTags(movie):
    tagIds = di.getAllTags()
    tagLen = len(tagIds)
    tfArray = [0 for i in range(tagLen)]
    unqTags = movie.getUnqTags()
    tags = movie.getTags()
    totalTags = len(tags)
    i = 0
    tfVect = {}
    for tagId in unqTags:
        tfFactor = 0
        for tag in tags:
            if (tag.getId() == tagId):
                tfFactor = tfFactor + tag.getTimeWeight()
        tfVect[tagId] = tfFactor / totalTags
    for i in range(tagLen):
        if (tagIds[i][0] in tfVect.keys()):
            tfArray[i] = tfVect[tagIds[i][0]]
    return tfArray

示例#9

0

显示文件

def idfMovieTag():
    allTags = di.getAllTags()
    allMovies = di.getAllMovies()
    movieCount = len(allMovies)
    idfMovTagArr = np.zeros(len(allTags))
    movTags = []
    for mov in allMovies:
        movTags.append(di.getMovieTagIds(mov[0])[0][0].split(","))
    for i in range(len(allTags)):
        tagCount = 0
        for j in range(len(allMovies)):
            if (allTags[i][0] in movTags[j]):
                tagCount = tagCount + 1
        res = 0
        if (tagCount != 0):
            res = math.log(movieCount / tagCount)
        idfMovTagArr[i] = res
    #print(idfMovTagArr)
    return idfMovTagArr

示例#10

0

显示文件

文件： tensorDecomp.py 项目： giriraj34/MovieRecommendationSystem

def vectTagMovRat():
    tags = di.getAllTags()
    movies = di.getAllMovies()
    ratings = di.getAllRatings()
    avgRatingsArray = di.getAllMovieRtngs()
    avgRatings = {}
    for arr in avgRatingsArray:
        avgRatings[arr[0]] = arr[1]
    #print("avgRatings",avgRatings)
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        for tag in tags:
            for rtng in ratings:
                vect[tag[0]][mov[0]][rtng[0]] = 0
                # Set the value to 1 if the given cond. is satisfied
                if ((tag[0] in movTags) and (rtng[0] <= avgRatings[mov[0]])):
                    vect[tag[0]][mov[0]][rtng[0]] = 1
    #print(vect['1'])
    return (vect, tags, movies, ratings)

示例#11

0

显示文件

文件： task4.py 项目： giriraj34/MovieRecommendationSystem

movies = db.getAllMovies()
movieNames = db.getAllMovieNames()
tfmovies = {}
for movieId in movies:
    Taglist = db.getMovieTags(movieId[0])
    UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",")
    #print(UnqTags,movieId,Taglist)
    tfvect = {}
    for tag in UnqTags:
        tffact = 0
        for t in Taglist:
            if (t[0] == tag):
                tffact += 1
        tfvect[tag[0]] = tffact / len(Taglist)
    tfmovies[movieId[0]] = tfvect
tagids = db.getAllTags()
#print(tagids)
movietf = np.zeros((len(tfmovies), len(tagids)))
for i in range(len(tfmovies)):
    for j in range(len(tagids)):
        if (tagids[j][0] in tfmovies[movies[i][0]].keys()):
            movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]]
matrix = np.matmul(movietf, np.transpose(movietf))
seedList = db.getUserMoviesRates(userId)
seeds = []
for seed in seedList:
    seeds.append(seed[0])
seedNames = []
for i in range(len(movies)):
    if (movies[i][0] in seeds):
        seedNames.append(movieNames[i][0])

示例#12

0

显示文件

    di.delRows("mltags", "movie_id", mov)
    di.delRows("movie_actor", "movie_id", mov)
    di.delRows("movie_info", "movie_id", mov)

allUsers = di.getAllUsers()
delUsers = []
for usr in allUsers:
    if (int(usr[0]) <= 71550):
        delUsers.append(usr[0])
print("delUsers", len(delUsers))
for usr in delUsers:
    di.delRows("mlratings", "user_id", usr)
    di.delRows("mltags", "user_id", usr)
    di.delRows("mlusers", "user_id", usr)
    print("usr ="******"actor = ", act[0])
        di.delRows("imdb_actor_info", "actor_id", act[0])
for tag in allTags:
    if (tag[0] not in mlTg):

示例#13

0

显示文件

import dbInfo as db
import numpy as np
import utils
import tfCalc as tf
import warnings
warnings.filterwarnings("ignore")

allTags = db.getAllTags()
lenTags = len(allTags)


#this function will generate a Matrix to be used as input to SVD
def genSVDMatrix(genrelist):
    genObj = tf.createGenObj(genrelist)
    movies = genObj.getMovies()
    matrix = [[0 for x in range(0, lenTags)] for y in range(0, len(movies))]
    i = 0
    for movie in movies:
        matrix[i] = utils.getGenreMovieTags(movie)
        i += 1
    return matrix


def svdCalc(mat, numSem):
    U, s, V = np.linalg.svd(mat, full_matrices=False)
    sem = np.zeros((numSem, len(V[0])))
    for i in range(numSem):
        for j in range(len(V[0])):
            sem[i][j] = V[i][j]
    return sem

示例#14

0

显示文件

文件： movieRecomm.py 项目： giriraj34/MovieRecommendationSystem

import dbInfo as di
import utils
import lda
import sys
from operator import itemgetter
import tensorDecomp as td
import persPageRank as ppr
import tfCalc as tf
import tfIdfCalc as idf
import numpy as np
from scipy.stats import mode

movies = di.getAllMovies()
tagIds = di.getAllTags()
allTagLen = len(tagIds)
movieLen = len(movies)

def formSvdMat(numSemantics):
	mat = np.zeros((movieLen,allTagLen))
	if(len(mat)<numSemantics or len(mat[0])<numSemantics):
		print("cant report top semantics")
		sys.exit()
	idfMovArr = idf.idfMovieTag()
	for i in range(movieLen):
		mat[i] = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
	U, s, V = np.linalg.svd(mat,full_matrices=False)
	movieFacts = np.zeros((movieLen, numSemantics))
	for i in range(movieLen):
		for j in range(numSemantics):
			movieFacts[i][j] = U[i][j]
	return movieFacts