import numpy as np from math260 import data_prep, recommend, score GAMES_FILE = "data/games.csv" REVIEWS_FILE = "data/reviews.csv" if __name__ == "__main__": games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True) games_map, users_map, rating_matrix, bool_matrix \ = data_prep.create_review_matrix(games, users, sparse=False, verbose=True) # testing removing 10% from each user and predicting using average score def rf(): return np.random.randint(0, 10) predictor = recommend.RandomPredictor(rf) rmse, errors = score.rmsecv(0.1, rating_matrix, bool_matrix, predictor.predict) print('RMSE:\t\t{}'.format(rmse))
def cosine_similarity(u1, u2, rating_matrix, bool_matrix): return 1 - dist.cosine(rating_matrix[u1], rating_matrix[u2]) if __name__ == "__main__": games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True) games_map, users_map, rating_matrix, bool_matrix \ = data_prep.create_review_matrix(games, users, sparse=False, verbose=True) results = [] for i in range(25): k = 20 * (i+1) print(f'using {k} neighbors') cosine_predictor = recommend.SimilarityPredictor(cosine_similarity, k) msd_predictor = recommend.SimilarityPredictor(msd_similarity, k) cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, cosine_predictor.predict, users=range(0, 1000)) sim_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, msd_predictor.predict, users=range(0, 1000)) result = {'k':k, 'cosine': cosine_rmse, 'sim': sim_rmse} results.append(result) print(result) json.dump(results, open(output, mode='w'))
combo_predictor = recommend.SimilarityPredictor(combo_sim, 120) adjusted_cosine_predictor = recommend.SimilarityPredictor(adjusted_cosine_similarity, 120) cosine_predictor = recommend.SimilarityPredictor(cosine_similarity, 120) msd_predictor = recommend.SimilarityPredictor(msd_similarity, 120) jaccard_predictor = recommend.SimilarityPredictor(jaccard_similarity, 120) avg_predictor = recommend.AveragePredictor(rating_matrix, bool_matrix) uavg_predictor = recommend.AveragePredictor(rating_matrix, bool_matrix) gavg_predictor = recommend.GlobalAveragePredictor(rating_matrix, bool_matrix) tavg_predictor = recommend.TwoWayAveragePredictor(rating_matrix, bool_matrix) ## adjusted_cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, ## adjusted_cosine_predictor.predict, users=range(0, 1000)) ## cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, ## cosine_predictor.predict, users=range(0, 1000)) combo_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, combo_predictor.predict, users=range(0, 1000)) jaccard_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, jaccard_predictor.predict, users=range(0, 1000)) sim_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, msd_predictor.predict, users=range(0, 1000)) avg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, avg_predictor.predict, users=range(0, 1000)) uavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, uavg_predictor.predict, users=range(0, 1000)) gavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, gavg_predictor.predict, users=range(0, 1000)) tavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix, tavg_predictor.predict, users=range(0, 1000)) # print(f"Adjusted cosine predictor RMSE: {adjusted_cosine_rmse}") # print(f"Cosine predictor RMSE: {cosine_rmse}")