def run_model(personality): reader = Reader(sep=',', skip_lines=0, rating_scale=(0.0, 1.0)) df = pd.DataFrame(json_to_pandas()) new = pd.DataFrame(user_to_dfrows(len(df.index), personality)) df = df.append(new) data = Dataset.load_from_df(df[['user', 'trait', 'percentile']], reader=reader) # pdb.set_trace() trainset = data.build_full_trainset() # Use user_based true/false to switch between user-based or item-based collaborative filtering # algo = KNNWithMeans(k=40, sim_options={ # 'name': 'pearson_baseline', 'user_based': False}) algo = SVD() # algo.fit(trainset) algo.fit(trainset) # exit(1) testset = trainset.build_anti_testset() predictions = algo.test(testset) import heapq heap = [] for prediction in predictions: heap.append((prediction[3], prediction[1])) heapq.heapify(heap) return heapq.nlargest(3, heap)
def movie_rater(movie_df, num=5, genre=None): """ This function is to handle a cold start with a new user. It takes in a number of ratings from a new user and gives the output of 5 movie recommendations. Args: movie_df(dataframe): the dataframe of movies that you will use to recommend movies num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5. genre(string): The genre of movies that you wish to pull from for your user to rate. The default is None. Returns: The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given. A collaborative filter is used to add their ratings to the inital dataframe to then find this output.""" userID = 1000 rating_list = [] while num > 0: if genre: movie = popular_movies_df[popular_movies_df['genres'].str.contains( genre)].sample(1) else: movie = popular_movies_df.sample(1) print(movie['title']) try: rating = input( 'How do you rate this movie on a scale of (low)1-5(high). Press n if you have not seen this movie: \n' ) if rating == 'n': continue else: rating_one_movie = { 'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating } rating_list.append(rating_one_movie) num -= 1 except: continue new_ratings_df = ratings_df.append(rating_list, ignore_index=True) new_data = Dataset.load_from_df(new_ratings_df, reader) svd_ = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1) svd_.fit(new_data.build_full_trainset()) list_of_movies = [] for m_id in ratings_df['movieId'].unique(): list_of_movies.append((m_id, svd_.predict(1000, m_id)[3])) ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True) n = 5 for idx, rec in enumerate(ranked_movies): title = movie_df.loc[movie_df['movieId'] == int(rec[0])]['title'] print('------------------------------------------------') print('Recommendation # ', idx + 1, ': ', title, '\n') n -= 1 if n == 0: break return
def initialize_and_fit_model(data): """ This function will instantialize and fit the model we choose for our program on our data(including the new user data) returns: - a model that has been fit on our data(including the new user data) """ from surprise.prediction_algorithms import SVD svd = SVD(n_factors=50, reg_all=0.05) return svd.fit(data.build_full_trainset())
def singular_value_decomposition(self, n_factors, reg_all): # build and fit full SVD training set current_utility_matrix = self.current_utility_matrix() reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df( current_utility_matrix[['User', 'URL', 'Rating']], reader) dataset = data.build_full_trainset() algo = SVD(n_factors=n_factors, reg_all=reg_all) algo.fit(dataset) # calculate SVD predictions for local user recommendations = current_utility_matrix.drop( ['User', 'Rating'], axis=1).drop_duplicates() recommendations['SVD'] = recommendations['URL'].apply( lambda x: algo.predict(self.current_user, x)[3]) recommendations = recommendations.sort_values(by='SVD', ascending=False)['URL'] new_recommendation = self.append_new_recommendation( recommendations, 'Singular Value Decomposition') return new_recommendation
# Mean absolute error. mae = accuracy.mae(predictions) df_predicted = pd.DataFrame(columns=["uid", "iid", "predicted", "actual"]) for prediction in predictions: df_predicted = df_predicted.append( { "uid": prediction.uid, "iid": prediction.iid, "predicted": prediction.est, "actual": df_swipes[prediction.uid].loc[prediction.iid] }, ignore_index=True ) acc_dict = {"algname": algname, "n_train": n_train, "n_users": n_users, "acc": mae} print(acc_dict) return acc_dict if __name__ == "__main__": # Save accuracy data for all swipe values and user values. df_acc = pd.DataFrame(columns=["algname", "n_train", "n_users", "acc"]) max_swipes = 210 for alg, algname in tqdm(zip([SVD(), NMF(), KNNWithMeans()], ["SVD", "NMF", "KNNWithMeans"])): for n_users in range(2, len(users)): for n_train in range(10, max_swipes, 10): df_acc = df_acc.append(acc(df_swipes, alg, algname, n_train, n_users), ignore_index=True) df_acc.to_csv("acc_organic.csv", index=False)
"iid": prediction.iid, "predicted": prediction.est, "actual": df_swipes[prediction.uid].loc[prediction.iid] }, ignore_index=True) acc_dict = { "algname": algname, "n_train": n_train, "n_users": n_users, "acc": mae } print(acc_dict) return acc_dict if __name__ == "__main__": # Save accuracy data for all swipe values and user values. df_acc = pd.DataFrame(columns=["algname", "n_train", "n_users", "acc"]) max_swipes = 210 for alg, algname in tqdm( zip([SVD(), NMF(), KNNWithMeans()], ["SVD", "NMF", "KNNWithMeans"])): for n_users in [5, 10, 25, 50, 100, 250, 500, 1000]: for n_train in range(10, max_swipes, 10): df_acc = df_acc.append(acc(df_swipes, alg, algname, n_train, n_users), ignore_index=True) df_acc.to_csv("acc.csv", index=False)
def test_SVD_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVD(n_factors=1, n_epochs=1) rmse_default = evaluate(algo, data, measures=['rmse'])['rmse'] # n_factors algo = SVD(n_factors=2, n_epochs=1) rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_factors # n_epochs algo = SVD(n_factors=1, n_epochs=2) rmse_n_epochs = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_n_epochs # lr_all algo = SVD(n_factors=1, n_epochs=1, lr_all=5) rmse_lr_all = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_all # reg_all algo = SVD(n_factors=1, n_epochs=1, reg_all=5) rmse_reg_all = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_all # lr_bu algo = SVD(n_factors=1, n_epochs=1, lr_bu=5) rmse_lr_bu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_bu # lr_bi algo = SVD(n_factors=1, n_epochs=1, lr_bi=5) rmse_lr_bi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_bi # lr_pu algo = SVD(n_factors=1, n_epochs=1, lr_pu=5) rmse_lr_pu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_pu # lr_qi algo = SVD(n_factors=1, n_epochs=1, lr_qi=5) rmse_lr_qi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_qi # reg_bu algo = SVD(n_factors=1, n_epochs=1, reg_bu=5) rmse_reg_bu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_bu # reg_bi algo = SVD(n_factors=1, n_epochs=1, reg_bi=5) rmse_reg_bi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_bi # reg_pu algo = SVD(n_factors=1, n_epochs=1, reg_pu=5) rmse_reg_pu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_pu # reg_qi algo = SVD(n_factors=1, n_epochs=1, reg_qi=5) rmse_reg_qi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_qi
"""使用surprise SVD系列算法推荐""" from surprise import Dataset from surprise import Reader from surprise.prediction_algorithms import SVD ,SVDpp from surprise import accuracy from surprise.model_selection import KFold import pandas as pd import os reader = Reader(line_format='user item rating', sep=',', skip_lines=1) data = Dataset.load_from_file( "./ratings.csv" , reader = reader) from surprise.model_selection import train_test_split x_train , x_test = train_test_split( data ,test_size = 0.2 ,random_state = 10000 ) svd = SVD(biased= False) svd.fit(x_train) prediction = svd.test(x_test) accuracy.rmse( predictions=prediction ) # RMSE: 0.8548 # 0.8547798833361556 import pandas as pd import numpy as np datas = pd.read_csv("ratings.csv" ,delimiter="," , skiprows=1 , names =["user" ,"item" ,"rating"] ,usecols= [0,1,2] ) datas["user"] = datas["user"].astype(np.int32) datas["item"] = datas["item"].astype(np.int32) datas["rating"] = datas["rating"].astype(np.int32) print(datas.dtypes )
from surprise.model_selection import cross_validate, train_test_split from surprise.prediction_algorithms import SVD from surprise import accuracy popular_movies_df = pd.read_csv('popular_movies.csv') ratings_df = pd.read_csv('ratings_limited_users.csv', usecols=['userId', 'movieId', 'rating']) movies_df = pd.read_csv('movies.csv') # Initializing a reader and data class reader = Reader() data = Dataset.load_from_df(ratings_df, reader) # Splitting the data into train and test sets trainset, testset = train_test_split(data, test_size=.25) # Using the tuned parameters for the SVD model svd = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1) svd.fit(trainset) svd_preds = svd.test(testset) # Function to get new users preferences on any movie or a particular genre def movie_rater(movie_df, num=5, genre=None): """ This function is to handle a cold start with a new user. It takes in a number of ratings from a new user and gives the output of 5 movie recommendations. Args: movie_df(dataframe): the dataframe of movies that you will use to recommend movies num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5. genre(string): The genre of movies that you wish to pull from for your user to rate. The default is None. Returns:
rating_one_movie = { 'userId': 1000, 'movieId': rating_movie['movieId'].values[0], 'rating': float(rating) / 2 } rating_list.append(rating_one_movie) n -= 1 # Make Predictions reader = Reader() new_ratings = ratings.append(rating_list, ignore_index=True) data = Dataset.load_from_df(new_ratings, reader).build_full_trainset() #Model print('\n working.... \n') svd = SVD(n_factors=100, n_epochs=35, lr_all=0.007, reg_all=0.07) svd.fit(data) # Gather and sort recommendations recommendation_list = [] for m_id in movies['movieId']: recommendation_list.append((m_id, 2 * svd.predict(1000, m_id)[3])) ranked_recommendations = sorted(recommendation_list, key=lambda x: x[1], reverse=True) # Deliver Results print('\n', 'Success!', '\n') X = int(input('How many movie recommendations would you like to see? '))
reader = surprise.Reader(rating_scale=(0, 1)) data = surprise.Dataset.load_from_df(sparse, reader) ''' for alg in [SVD(), NMF(), KNNWithMeans()]: output = alg.fit(data.build_full_trainset()) preds={} for name in names: preds[name] = sorted([(i, alg.predict(uid=name, iid=str(i)).est) for i in complement_ids[name]], key=lambda x: x[1], reverse=True) print(preds) ''' cutoff = .5 trainset, testset = train_test_split(data, test_size=0.25) for alg in [SVD(), NMF(), KNNWithMeans()]: alg.fit(trainset) predictions = alg.test(testset) # Change predictions to binary choice of left or right. Prediction class derives from NamedTuple. predictions = [ Prediction(prediction.uid, prediction.iid, prediction.r_ui, int(prediction.est < cutoff), prediction.details) for prediction in predictions ] # print(predictions) accuracy.mae(predictions) df_predicted = pd.DataFrame(columns=["uid", "iid", "predicted", "actual"]) for prediction in predictions: df_predicted = df_predicted.append(