Exemplo n.º 1
0
from math260 import data_prep, recommend, score
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

# choose your datasets
GAMES_DATA_FILE = "data/games.csv"
REVIEWS_FILE = "data/reviews.csv"

games, users = data_prep.parse_data(GAMES_DATA_FILE, REVIEWS_FILE, True)
games_map, users_map, rating_matrix, bool_matrix = \
    data_prep.create_review_matrix(games, users, sparse=False, verbose=True)

# creates a bar plot of the frequency of each rating

score_counts = np.zeros(11)

game_names = list(games.keys())

print('Iterating over all reviews')
for name in tqdm(game_names):
    game = games[name]
    reviews = game['reviews']
    for review in reviews:
        score_counts[int(review['rating'])] += 1

plt.bar(np.arange(11), score_counts)
plt.xlabel('Rating')
plt.ylabel('Number of Reviews')
plt.title('Distribution of Ratings')
plt.savefig('figures/rating-dist.png')
Exemplo n.º 2
0
from math260 import data_prep, recommend, score
import numpy as np

GAMES_FILE = "data/games.csv"
REVIEWS_FILE = "data/reviews.csv"

if __name__ == "__main__":
    games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True)
    games_map, users_map, rating_matrix, bool_matrix  \
        = data_prep.create_review_matrix(games, users, sparse=False, verbose=True)

    game_predictor = recommend.AveragePredictor(np.copy(rating_matrix),
                                                np.copy(bool_matrix))
    removed = score.remove_fraction(0.1, rating_matrix, bool_matrix)

    # testing removing 10% from each user and predicting using average score
    global_predictor = recommend.GlobalAveragePredictor(
        rating_matrix, bool_matrix)
    user_predictor = recommend.UserAveragePredictor()
    tw_predictor = recommend.TwoWayAveragePredictor(rating_matrix, bool_matrix)

    glob_avg_rmse = score.rmse(removed,
                               rating_matrix,
                               bool_matrix,
                               global_predictor.predict,
                               users=range(0, 1000))
    game_avg_rmse = score.rmse(removed,
                               rating_matrix,
                               bool_matrix,
                               game_predictor.predict,
                               users=range(0, 1000))