Пример #1
0
def loadMoviesFromServer(genre=None):
	conn = projSql.get_sql_connection()
	data = conn.cursor()
	if genre == None:
		sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid"
	else:
		sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1"
def getRecommendedItems(user,genre=None):
	conn = projSql.get_sql_connection()
	data = conn.cursor()
	sql = "SELECT r.movieid, r.rating FROM ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r WHERE r.userid =" + str(user)
	data.execute(sql)
	scores={}
	totalSim={}
	userRatings = {}
	# Loop over items rated by this user
	for (movie,rating) in data:
		userRatings[movie] = rating
	for (movie,rating) in userRatings.items():
		# Loop over items similar to this one
		movie_sim = getMovieSimilarities(movie,genre)
		for (movie2, similarity) in movie_sim.items():
			# Ignore if this user has already rated this item
			if movie2 in userRatings: continue
			# Weighted sum of rating times similarity
			scores.setdefault(movie2,0)
			scores[movie2]+=similarity*rating
			# Sum of all the similarities
			totalSim.setdefault(movie2,0)
			totalSim[movie2]+=similarity
		
	# Divide each total score by total weighting to get an average 
	rankings=[(score/totalSim[movie],movie) for movie,score in scores.items() if score!=0]
	# Return the rankings from highest to lowest 
	rankings.sort()
	rankings.reverse()
	recommend = [str(movie) for (score,movie) in rankings]
	return recommend
def getRecommendedItems(user, genre=None):
    conn = projSql.get_sql_connection()
    data = conn.cursor()
    sql = "SELECT r.movieid, r.rating FROM ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r WHERE r.userid =" + str(
        user)
    data.execute(sql)
    scores = {}
    totalSim = {}
    userRatings = {}
    # Loop over items rated by this user
    for (movie, rating) in data:
        userRatings[movie] = rating
    for (movie, rating) in userRatings.items():
        # Loop over items similar to this one
        movie_sim = getMovieSimilarities(movie, genre)
        for (movie2, similarity) in movie_sim.items():
            # Ignore if this user has already rated this item
            if movie2 in userRatings: continue
            # Weighted sum of rating times similarity
            scores.setdefault(movie2, 0)
            scores[movie2] += similarity * rating
            # Sum of all the similarities
            totalSim.setdefault(movie2, 0)
            totalSim[movie2] += similarity

    # Divide each total score by total weighting to get an average
    rankings = [(score / totalSim[movie], movie)
                for movie, score in scores.items() if score != 0]
    # Return the rankings from highest to lowest
    rankings.sort()
    rankings.reverse()
    recommend = [str(movie) for (score, movie) in rankings]
    return recommend
def getMovieSimilarities(movieid,genre=None):
	conn = projSql.get_sql_connection()
	data = conn.cursor()
	if genre==None:
		sql = "SELECT movie_id2, similarity FROM movie_similarities WHERE movie_id1 = " + str(movieid)
	else:
		sql = "SELECT movie_id2, similarity FROM movie_similarities as s JOIN movies as m on s.movie_id2 = m.id WHERE movie_id1 = " + str(movieid) + ' AND ' + genre + "=1"
	data.execute(sql)
	movie_sim = {}
	for (movie_id2, similarity) in data:
		movie_sim[movie_id2] = similarity
	return movie_sim	
Пример #5
0
def loadMoviesFromServer(genre=None):
    conn = projSql.get_sql_connection()
    data = conn.cursor()
    if genre == None:
        sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid"
    else:
        sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1"
    data.execute(sql)
    prefs = {}
    for row in data:
        (movieid, userid, rating) = row
        prefs.setdefault(userid, {})
        prefs[userid][movieid] = float(rating)
    return prefs
def loadMoviesFromServer(genre=None):
	conn = projSql.get_sql_connection()
	data = conn.cursor()
	if genre == None:
		sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid"
	else:
		sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1"
	data.execute(sql)
	prefs = {}
	for row in data:
		(movieid, userid, rating) = row
		prefs.setdefault(userid,{})
		prefs[userid][movieid]=float(rating)
	return prefs
def getMovieSimilarities(movieid, genre=None):
    conn = projSql.get_sql_connection()
    data = conn.cursor()
    if genre == None:
        sql = "SELECT movie_id2, similarity FROM movie_similarities WHERE movie_id1 = " + str(
            movieid)
    else:
        sql = "SELECT movie_id2, similarity FROM movie_similarities as s JOIN movies as m on s.movie_id2 = m.id WHERE movie_id1 = " + str(
            movieid) + ' AND ' + genre + "=1"
    data.execute(sql)
    movie_sim = {}
    for (movie_id2, similarity) in data:
        movie_sim[movie_id2] = similarity
    return movie_sim
Пример #8
0
def calculateSimilarItems(prefs):
	# Create a dictionary of items showing which other items they
	# are most similar to.
	result={}
	c=0
	conn = projSql.get_sql_connection()
	data = conn.cursor()
	for movie_id1 in prefs:
		# Status updates for large datasets
		c+=1
		if c%100==0: print "%d / %d" % (c,len(prefs))
		# Find the most similar items to this one
		scores=topMatches(prefs,movie_id1,similarity=sim_distance)
		result[movie_id1]=scores
		for item in scores:
			movie_id2 = item[1]
			similarity = item[0]
			sql = "REPLACE INTO movie_similarities(movie_id1,movie_id2,similarity) VALUES (" + str(movie_id1) + "," + str(movie_id2) + "," + str(similarity) + ")"	
			data.execute(sql)
Пример #9
0
def calculateSimilarItems(prefs):
    # Create a dictionary of items showing which other items they
    # are most similar to.
    result = {}
    c = 0
    conn = projSql.get_sql_connection()
    data = conn.cursor()
    for movie_id1 in prefs:
        # Status updates for large datasets
        c += 1
        if c % 100 == 0: print "%d / %d" % (c, len(prefs))
        # Find the most similar items to this one
        scores = topMatches(prefs, movie_id1, similarity=sim_distance)
        result[movie_id1] = scores
        for item in scores:
            movie_id2 = item[1]
            similarity = item[0]
            sql = "REPLACE INTO movie_similarities(movie_id1,movie_id2,similarity) VALUES (" + str(
                movie_id1) + "," + str(movie_id2) + "," + str(similarity) + ")"
            data.execute(sql)
Пример #10
0
import json
import projSql
import time

start_time = time.clock()
conn = projSql.get_sql_connection()
data = conn.cursor()
sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid"
data.execute(sql)
prefs = {}
for row in data:
	(movieid, userid, rating) = row
	prefs.setdefault(userid,{})
	prefs[userid][movieid]=float(rating)
with open('user_ratings.json', 'w') as outfile:
    json.dump(prefs, outfile)
			
print("Time taken: "+str(time.clock() - start_time)+" seconds.\n")
Пример #11
0
import json
import projSql
import time

start_time = time.clock()
conn = projSql.get_sql_connection()
data = conn.cursor()
sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid"
data.execute(sql)
prefs = {}
for row in data:
    (movieid, userid, rating) = row
    prefs.setdefault(userid, {})
    prefs[userid][movieid] = float(rating)
with open('user_ratings.json', 'w') as outfile:
    json.dump(prefs, outfile)

print("Time taken: " + str(time.clock() - start_time) + " seconds.\n")