def vectActMovTag():
    actors = di.getAllActors()
    tags = di.getAllTags()
    movies = di.getAllMovies()
    years = di.getAllYears()
    movYearsArray = di.getAllMovieYrs()
    movYears = {}
    for arr in movYearsArray:
        movYears[arr[0]] = arr[1]
    #print("movYears", movYears)
    actMoviesDb = {}
    for act in actors:
        actMovies = di.getActorMovieIds(act[0])
        actMov = []
        for mov in actMovies:
            actMov.append(mov[0])
        actMoviesDb[act[0]] = actMov
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        #print(len(movTags))
        for act in actors:
            actMovies = actMoviesDb[act[0]]
            #print("actMovies:",actMovies)
            for tag in tags:
                #print("tag",tag[0])
                vect[mov[0]][act[0]][tag[0]] = 0
                #print("i am here")
                # Set the value to 1 if the given cond. is satisfied
                if ((mov[0] in actMovies) and
                    (tag[0] in movTags)):  #and (movYears[mov[0]] == yr[0])):
                    vect[act[0]][mov[0]][tag[0]] = movYears[mov[0]]
    #print(vect['1'])
    return (vect, actors, movies, years)
Пример #2
0
def loadDataset(filename, trainingSet=[] , testSet=[]):
	test=[]
	with open(filename, 'r') as csvfile:
		lines = csv.reader(csvfile)
		dataset = list(lines)
	movies = di.getAllMovies()
	tagIds = di.getAllTags()
	allTagLen = len(tagIds)
	dataset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies))]
	idfMovArr = idf.idfMovieTag()
	for i in range(len(dataset)):
		idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
		for j in range(len(idfVect)):
			dataset_copy[i][j] = idfVect[j]
		dataset_copy[i][allTagLen]=dataset[i][1]
		trainingSet.append(dataset_copy[i])
	train = [0 for i in range(len(dataset))]
	for i in range(len(dataset)):
		train[i] = int(dataset[i][0])
	k=0
	labels = ['0', '1']
	testset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies)-len(train))]
	for i in range(len(movies)):
			if(int(movies[i][0]) in train):
				pass
			else:
				test.append(movies[i][0])
				idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
				for j in range(len(idfVect1)):
					testset_copy[k][j] = idfVect1[j]
				#testset_copy[k][allTagLen]=db.getMovieGenre(movies[i][0])[0]
				testset_copy[k][allTagLen]=random.choice(labels)
				testSet.append(testset_copy[k])
				k=k+1
	return test,trainingSet,testSet
Пример #3
0
def loadDataset(filename, trainingSet=[], testSet=[]):

    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
    labels = ['' for i in range(len(dataset))]
    movies = di.getAllMovies()
    tagIds = di.getAllTags()
    allTagLen = len(tagIds)
    dataset_copy = [['' for i in range(allTagLen)]
                    for j in range(len(dataset))]
    #dataset_copy = numpy.zeros((len(movies),allTagLen+1))
    #dataset_copy = [[0 for i in range(allTagLen+1)] for j in range(len(movies))]
    idfMovArr = idf.idfMovieTag()
    #print(idfMovArr)
    for i in range(len(dataset)):
        idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
        for j in range(len(idfVect)):
            dataset_copy[i][j] = idfVect[j]
        #dataset_copy[i][allTagLen]=dataset[i][1]
        labels[i] = dataset[i][1]
        trainingSet.append(dataset_copy[i])
    train = [0 for i in range(len(dataset))]

    target = ['' for i in range(len(movies))]
    for i in range(len(dataset)):
        train[i] = int(dataset[i][0])
    k = 0
    test = []
    label = ['0', '1']
    testset_copy = [['' for i in range(allTagLen)] for j in range(len(movies))]
    for i in range(len(movies)):
        if (int(movies[i][0]) in train):
            pass
        else:
            test.append(movies[i][0])
            idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
            for j in range(len(idfVect1)):
                testset_copy[k][j] = idfVect1[j]
            #testset_copy[k][allTagLen]=di.getMovieGenre(movies[i][0])[0]
            #testset_copy[k][allTagLen]=random.choice(labels)
            target[k] = random.choice(label)
            testSet.append(testset_copy[k])
            k = k + 1
    #print("train data =",trainingSet)
    #print("\n\n test data =",testSet)
    return trainingSet, testSet, labels, target, test
Пример #4
0
def idfMovieTag():
    allTags = di.getAllTags()
    allMovies = di.getAllMovies()
    movieCount = len(allMovies)
    idfMovTagArr = np.zeros(len(allTags))
    movTags = []
    for mov in allMovies:
        movTags.append(di.getMovieTagIds(mov[0])[0][0].split(","))
    for i in range(len(allTags)):
        tagCount = 0
        for j in range(len(allMovies)):
            if (allTags[i][0] in movTags[j]):
                tagCount = tagCount + 1
        res = 0
        if (tagCount != 0):
            res = math.log(movieCount / tagCount)
        idfMovTagArr[i] = res
    #print(idfMovTagArr)
    return idfMovTagArr
def vectTagMovRat():
    tags = di.getAllTags()
    movies = di.getAllMovies()
    ratings = di.getAllRatings()
    avgRatingsArray = di.getAllMovieRtngs()
    avgRatings = {}
    for arr in avgRatingsArray:
        avgRatings[arr[0]] = arr[1]
    #print("avgRatings",avgRatings)
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        for tag in tags:
            for rtng in ratings:
                vect[tag[0]][mov[0]][rtng[0]] = 0
                # Set the value to 1 if the given cond. is satisfied
                if ((tag[0] in movTags) and (rtng[0] <= avgRatings[mov[0]])):
                    vect[tag[0]][mov[0]][rtng[0]] = 1
    #print(vect['1'])
    return (vect, tags, movies, ratings)
def vectActMovYr():
    actors = di.getAllActors()
    movies = di.getAllMovies()
    years = di.getAllYears()
    movYearsArray = di.getAllMovieYrs()
    movYears = {}
    for arr in movYearsArray:
        movYears[arr[0]] = arr[1]
    #print("movYears", movYears)
    actMoviesDb = {}
    moviesArr = []
    for mov in movies:
        moviesArr.append(mov[0])
    for act in actors:
        actMovies = di.getActorMovieIds(act[0])
        actMov = []
        chk = 0
        for mov in actMovies:
            actMov.append(mov[0])
            if (mov[0] in moviesArr):
                chk = 1
        if (chk == 1):
            actMoviesDb[act[0]] = actMov
    print("\ngot the actor movies\n")
    vect = defaultdict(lambda: defaultdict(dict))
    for act in actMoviesDb:
        #print("movie set", act)
        for mov in moviesArr:
            #print("mov",mov)
            for yr in years:
                #print("yr",yr[0])
                vect[act][mov][yr[0]] = 0
                # Set the value to 1 if the given cond. is satisfied
                if ((mov in actMoviesDb[act]) and (movYears[mov] == yr[0])):
                    vect[act][mov][yr[0]] = 1
    #print(vect['1'])
    return (vect, actors, movies, years)
Пример #7
0
import dbInfo as di
import probFeedback as pf
import movieRecomm as mr
import numpy as np

userId = input("\nGive User Id: ")
numMovies = 5

movies = di.getAllMovies()
movieNames = di.getAllMovieNames()
moviesArr = []
for mov in movies:
	moviesArr.append(mov[0])

mat = mr.formPPRMatrix()
matrix = np.matmul(mat,np.transpose(mat))

usrSeenMovies = di.getusrMovTime(userId)
if(len(usrSeenMovies) <= 0):
	print("user has not watched any movies to give suggestions")
	sys.exit()
usrMovies = []
for mov in usrSeenMovies:
	usrMovies.append(mov[0])
print("\nseeds =", usrMovies)
rankedRes = mr.pprRes(matrix, usrMovies)
#print("ranked res",rankedRes)
print("\nRank\tMovie Id\tMovie Name\n")
for i in range(numMovies):
	movIdx = moviesArr.index(rankedRes[i])
	print(i+1,":\t",rankedRes[i],"\t\t",movieNames[movIdx][0])
Пример #8
0
import dbInfo as db
import numpy as np
import persPageRank as ppr

userId = input("\nGive User Id: ")

movies = db.getAllMovies()
movieNames = db.getAllMovieNames()
tfmovies = {}
for movieId in movies:
    Taglist = db.getMovieTags(movieId[0])
    UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",")
    #print(UnqTags,movieId,Taglist)
    tfvect = {}
    for tag in UnqTags:
        tffact = 0
        for t in Taglist:
            if (t[0] == tag):
                tffact += 1
        tfvect[tag[0]] = tffact / len(Taglist)
    tfmovies[movieId[0]] = tfvect
tagids = db.getAllTags()
#print(tagids)
movietf = np.zeros((len(tfmovies), len(tagids)))
for i in range(len(tfmovies)):
    for j in range(len(tagids)):
        if (tagids[j][0] in tfmovies[movies[i][0]].keys()):
            movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]]
matrix = np.matmul(movietf, np.transpose(movietf))
seedList = db.getUserMoviesRates(userId)
seeds = []