Exemplo n.º 1
0
def baseline_global_mean(train, test, training=False):
    mean = train.Rating.mean()
    pred_test = test.copy()
    pred_test.Rating = mean

    if training:
        rmse = compute_rmse(pred_test, test)
        print("baseline_global_mean: {}".format(rmse))
    return pred_test
Exemplo n.º 2
0
def movie_mean_user_standardize(train, test, training=False):
    stand_train = user_standardize(train)
    stand_pred_test = baseline_movie_mean(stand_train, test)

    #recover from the standardized predicted test rating
    pred_test = user_standardize_recover(train, stand_pred_test)

    #compute the rmse
    if training:
        rmse = compute_rmse(pred_test, test)
        print("movie_mean_user_std: {}".format(rmse))

    return pred_test
Exemplo n.º 3
0
def baseline_movie_median(train, test, training=False):
    median_per_movie = train.groupby('Movie').median().Rating

    pred_test = test.copy()

    def predict(sub_df):
        sub_df['Rating'] = median_per_movie[sub_df.iloc[0, 1]]
        return sub_df

    pred_test = pred_test.groupby('Movie').apply(predict)
    if training:
        rmse = compute_rmse(pred_test, test)
        print("baseline_movie_mean: {}".format(rmse))

    return pred_test
Exemplo n.º 4
0
def movie_median_user_habit_standardize(train, test, training=False):
    #standardize the rating according to per user mean and variance
    stand_train = user_habit_standardize(train)

    #predict the standardized test rating
    stand_pred_test = baseline_movie_median(stand_train, test)

    #recover from the standardized predicted test rating
    pred_test = user_habit_standardize_recover(train, stand_pred_test)

    #compute the rmse
    if training:
        rmse = compute_rmse(pred_test, test)
        print("movie_median_user_habit_std: {}".format(rmse))

    return pred_test
Exemplo n.º 5
0
def movie_mean_user_habit_standardize(train, test, training=False):
    #standardize the rating according to per user habit
    pred_test = test.copy()
    pred_test.Rating = pred_test.Rating.apply(lambda x: float(x))
    stand_train = user_habit_standardize(train)

    #predict the standardized test rating
    stand_pred_test = baseline_movie_mean(stand_train, test)

    #recover from the standardized predicted test rating
    pred_test = user_habit_standardize_recover(train, stand_pred_test)

    #compute the rmse
    if training:
        rmse = compute_rmse(pred_test, test)
        print("movie_mean_user_habit_std: {}".format(rmse))

    return pred_test
Exemplo n.º 6
0
def movie_median_user_habit(train, test, training=False):
    habit = user_habit(train)
    median_per_movie = train.groupby('Movie').median().Rating

    pred_test = test.copy()
    pred_test['Rating'] = pred_test['Rating'].apply(lambda x: float(x))

    def predict(x):
        x['Rating'] = median_per_movie[x['Movie']] + habit[x['User']]
        return x

    pred_test = pred_test.apply(predict, axis=1)
    pred_test['User'] = pred_test['User'].apply(lambda x: int(x))
    pred_test['Movie'] = pred_test['Movie'].apply(lambda x: int(x))

    if training:
        rmse = compute_rmse(pred_test, test)
        print("movie_median_user_habit: {}".format(rmse))

    return pred_test