def load_svd(): _k = 100 # matrix = np.loadtxt('content/full_matrix_for_svd.pkl') matrix = load_features('content/full_matrix_for_svd.pkl') np_matrix = matrix.as_matrix() u, s, v = np.linalg.svd(np_matrix) reduced_u = u[:, :_k] # 3112 x _k reduced_s = s[:_k] # _k x 1 reduced_v = v[:_k, :] # _k x 3473 return reduced_u, reduced_s, reduced_v
# exit() conn = sqlite3.connect( '/home/ralph/Dev/content-based-recsys/content/database.db') _all_ratings = pd.read_sql( 'select distinct t.id ' 'from movielens_rating r ' 'join movielens_movie m on m.movielensid = r.movielensid ' 'join trailers t on t.imdbid = m.imdbidtt ' 'where t.best_file = 1 ' # 'and userid < 5000 ' 'order by t.id', conn) # index_to_trailer_id = {} tfidf_array = load_features('movies_tfidf_synopsis_array.pkl') # print _all_ratings.iloc[1] # exit() count = 0 _safe_exit = 2 trailer_tfidf_similarities = dict() for i in range(0, len(tfidf_array)): # print sum(tfidf_array[i]) trailer_id = _all_ratings.iloc[i] print trailer_id trailer_tfidf_similarities[trailer_id[0]] = {} # trailer_tfidf_similarities[trailer_id[0]] = []
# coding=utf-8 from utils.opening_feat import load_features, save_obj import numpy as np from matplotlib import pyplot as plt # results = load_features('results_3112_users.pkl') # results_low_level = load_features('results_3112_users_low_level_features.pkl') # for i in range(2, 16): # results[i]['low-level'] = results_low_level[i]['low-level'] # print results # save_obj(results, 'full_results_3112_users') # exit() results = load_features('../results_3112_users.pkl') # collaborative, DeepRecVis (deep), user-centroid, user-centroid-relevant-movies, mixing-weighted-hybrid, weighted-weighted-hybrid listing = [] # user_collaborative, item_collaborative, deep, weighted-hybrid, low_level = \ # {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []}, \ # {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []},\ # {'precision': [], 'recall': [], 'diversity': []} # user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, weighted_hybrid_collaborative, \ # weighted_hybrid_item_content, switching_hybrid, tfidf, synopsis = \ deep, low_level, tfidf, synopsis = \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}
from utils.opening_feat import load_features import operator _trailers_tfidf_sims_matrix = load_features( '/home/ralph/Dev/content-based-recsys/content/trailer_tfidf_similarities.pkl' ) print _trailers_tfidf_sims_matrix[4484] print type(_trailers_tfidf_sims_matrix[4484]) # print _trailers_tfidf_sims_matrix[4484] sorted_x = sorted(_trailers_tfidf_sims_matrix[4484].items(), key=operator.itemgetter(1), reverse=True) print sorted_x
import time import recommender import evaluation from utils.opening_feat import load_features, save_obj start = time.time() # 85040 is the full set size (4252 is 20 iterations) # users = select_random_users(conn, 100 * batch, 100) _item_item_collaborative_matrix = load_features( 'content/item_item_collaborative_similarities.pkl') # print _item_item_collaborative_matrix[4484] # x = [k for k, v in _item_item_collaborative_matrix[4484] if v == (4485, 23.988368963108908)] # print x # print _item_item_collaborative_matrix[4484].index((4485, 23.988368963108908)) # print _item_item_collaborative_matrix[4484].index((4486, -40.004855289600997)) # exit() print "loading user profiles..." user_profiles = load_features('content/user_profiles_dataframe_3112_users.pkl') # user_profiles = load_features('content/user_profiles_dataframe_all_users.pkl') print "user profiles loaded in", time.time() - start, "seconds." # user_profiles = load_features('content/user_profiles_dataframe_with_user_centroid.pkl') # user_profiles = user_profiles[:20] # print "AVG", user_profiles.iloc[7]['avg'], "." # DEEP_FEATURES_BOF = extract_features('content/bof_128.bin') # Map every similarity between each movie
import sqlite3 import pandas as pd import math from sklearn.metrics.pairwise import cosine_similarity from utils.utils import sort_desc from utils.opening_feat import save_obj, load_features # df = load_features('/home/ralph/Dev/content-based-recsys/item_item_collaborative_similarities.pkl') # print df # exit() user_profiles = load_features( '../content/user_profiles_dataframe_all_users.pkl') # print user_profiles.index.values # print user_profiles.loc[3858]['avg'] # exit() conn = sqlite3.connect( '/home/ralph/Dev/content-based-recsys/content/database.db') _all_ratings = pd.read_sql( 'select userID, t.id, rating from movielens_rating r ' 'join movielens_movie m on m.movielensid = r.movielensid ' 'join trailers t on t.imdbid = m.imdbidtt ' 'where t.best_file = 1 ' 'and userid < 5000 ' 'order by t.id', conn) movies = _all_ratings['id'].unique() movie_similarity = {}
# import pandas as pd from utils.opening_feat import load_features user_profiles_with_predictions = load_features( 'content/profiles_with_predictions.pkl') # df = pd.DataFrame.from_dict(user_profiles_with_predictions) for index, profile in user_profiles_with_predictions.iteritems(): print "index", index print profile
from utils.opening_feat import load_features, save_obj from hausdorff import hausdorff from utils.utils import sort_desc import numpy as np _users_bof = load_features('content/3112_users_bof.pkl') # test_user_1 = np.array(_users_bof[1]) # test_user_3 = np.array(_users_bof[7]) # print test_user_1 # print hausdorff(test_user_1, test_user_3) users_bof_similarities = {} for key, user_bof in _users_bof.iteritems(): users_bof_similarities[key] = [] print "current user", key for neighbor, neighbor_bof in _users_bof.iteritems(): if neighbor == key: continue sim = hausdorff(np.array(user_bof), np.array(neighbor_bof)) users_bof_similarities[key].append((neighbor, sim)) users_bof_similarities[key] = sort_desc(users_bof_similarities[key], desc=False) # print users_bof_similarities[key] # break save_obj(users_bof_similarities, '3112_user_user_bof_similarities')
import sqlite3 import time import pandas as pd import numpy as np from utils.opening_feat import load_features, save_obj matrix = load_features( '/home/ralph/Dev/content-based-recsys/content/full_matrix_for_svd.pkl') print type(matrix.as_matrix()) exit() _movies_sql = 'select DISTINCT t.id from trailers t ' \ 'join movielens_movie m on t.imdbid = m.imdbidtt ' \ 'join movielens_rating r on m.movielensid = r.movielensid ' \ 'where userid < 5000 ' \ 'order by t.id' start = time.time() conn = sqlite3.connect( '/home/ralph/Dev/content-based-recsys/content/database.db') _3112_user_ratings = pd.read_sql( 'select userID, t.id, rating from movielens_rating r ' 'join movielens_movie m on m.movielensid = r.movielensid ' 'join trailers t on t.imdbid = m.imdbidtt ' 'where userid < 5000 ' 'order by userid, t.id', conn) c = conn.cursor() _movies = c.execute(_movies_sql)
import sqlite3 import pandas as pd import numpy as np import sys import math from sklearn.metrics.pairwise import cosine_similarity from utils.utils import sort_desc from scipy.stats import pearsonr from utils.opening_feat import load_features, save_obj user_profiles = load_features( '/home/ralph/Dev/content-based-recsys/content/user_profiles_dataframe_all_users.pkl' ) # print user_profiles.loc[3113] # exit() # print user_profiles.columns # exit() conn = sqlite3.connect('content/database.db') _all_ratings = pd.read_sql( 'select userID, t.id, rating from movielens_rating r ' 'join movielens_movie m on m.movielensid = r.movielensid ' 'join trailers t on t.imdbid = m.imdbidtt ' # 'where userid < 5000 ' 'order by userid', conn) conn.close() users = _all_ratings['userID'].unique() movies = _all_ratings['id'].unique()
from utils.opening_feat import load_features, save_obj import numpy as np from matplotlib import pyplot as plt # results = load_features('results_3112_users.pkl') # results_low_level = load_features('results_3112_users_low_level_features.pkl') # for i in range(2, 16): # results[i]['low-level'] = results_low_level[i]['low-level'] # print results # save_obj(results, 'full_results_3112_users') # exit() results = load_features('results_500_users.pkl') # collaborative, DeepRecVis (deep), user-centroid, user-centroid-relevant-movies, mixing-weighted-hybrid, weighted-weighted-hybrid listing = [] # user_collaborative, item_collaborative, deep, weighted-hybrid, low_level = \ # {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []}, \ # {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []},\ # {'precision': [], 'recall': [], 'diversity': []} # user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, weighted_hybrid_collaborative, \ # weighted_hybrid_item_content, switching_hybrid, tfidf, synopsis = \ user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, switching_hybrid, tfidf, synopsis = \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \ {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \