Пример #1
0
def main():
    # Load user ratings
    raw_training_dataset_df = pd.read_csv(
        'movie_ratings_data_set_training.csv')
    raw_testing_dataset_df = pd.read_csv('movie_ratings_data_set_testing.csv')

    # Convert the running list of user ratings into a matrix
    ratings_training_df = pd.pivot_table(raw_training_dataset_df,
                                         index='user_id',
                                         columns='movie_id',
                                         aggfunc=np.max)
    ratings_testing_df = pd.pivot_table(raw_testing_dataset_df,
                                        index='user_id',
                                        columns='movie_id',
                                        aggfunc=np.max)

    # Apply matrix factorization to find the latent features
    U, M = matrix_factorization_utilities.low_rank_matrix_factorization(
        ratings_training_df.values, num_features=11, regularization_amount=1.1)

    # Find all predicted ratings by multiplying U and M
    predicted_ratings = np.matmul(U, M)

    # Measure RMSE
    rmse_training = matrix_factorization_utilities.RMSE(
        ratings_training_df.values, predicted_ratings)
    rmse_testing = matrix_factorization_utilities.RMSE(
        ratings_testing_df.values, predicted_ratings)

    print("Training RMSE: {}".format(rmse_training))
    print("Testing RMSE: {}".format(rmse_testing))
Пример #2
0
    def stratLearn(self):
        """
        Learn user use other users by matrix fuctorization mathod.
        first create pivot table matrix of users, products and purchases.
        create two matrix by factorization matrix :
        U-users features 
        P-products features
        predicted purchases list get from multiplied U and P matrix 
        """
        self.setDataFrames()

        #normalize quntity
        self.user_product_purch_df_normalized = self.normalize_quantity()

        purchases = pd.pivot_table(self.user_product_purch_df_normalized,
                                   index='user_num',
                                   columns='product_num',
                                   aggfunc=np.max)
        U, P = matrix_factorization_utilities.low_rank_matrix_factorization(
            purchases.as_matrix(), num_features=15, regularization_amount=3.6)
        predicted_purchases = np.matmul(U, P)
        P = np.transpose(P)
        pickle.dump(U, open("user_features.dat", "wb"))
        pickle.dump(P, open("product_features.dat", "wb"))
        pickle.dump(predicted_purchases, open("predicted_purchases.dat", "wb"))

        rmse = matrix_factorization_utilities.RMSE(purchases.as_matrix(),
                                                   predicted_purchases)
Пример #3
0
# Load user ratings
raw_training_dataset_df = pd.read_csv(
    "data/movie_ratings_data_set_training.csv")
raw_testing_dataset_df = pd.read_csv("data/movie_ratings_data_set_testing.csv")

# Convert the running list of user ratings into a matrix
ratings_training_df = pd.pivot_table(raw_training_dataset_df,
                                     index='user_id',
                                     columns='movie_id',
                                     aggfunc=np.max)
ratings_testing_df = pd.pivot_table(raw_testing_dataset_df,
                                    index='user_id',
                                    columns='movie_id',
                                    aggfunc=np.max)

# Apply matrix factorization to find latent features
U, M = mfu.low_rank_matrix_factorization(ratings_training_df.as_matrix(),
                                         num_features=11,
                                         regularization_amount=1.1)

# Find all predicted ratings by multiplying U and M
predicted_ratings = np.matmul(U, M)

# Measure RMSE
rmse_training = mfu.RMSE(ratings_training_df.as_matrix(), predicted_ratings)
rmse_testing = mfu.RMSE(ratings_testing_df.as_matrix(), predicted_ratings)

print("Training RMSE: {}".format(rmse_training))
print("Testing RMSE: {}".format(rmse_testing))
import numpy as np
import pandas as pd
import matrix_factorization_utilities

# Load user ratings
raw_training_dataset_df = pd.read_csv('movie_ratings_data_set_training.csv')
raw_testing_dataset_df = pd.read_csv('movie_ratings_data_set_testing.csv')

# Convert the running list of user ratings into a matrix
ratings_training_df = pd.pivot_table(raw_training_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max)
ratings_testing_df = pd.pivot_table(raw_testing_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max)

# Apply matrix factorization to find the latent features
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(ratings_training_df.as_matrix(),
                                                                    num_features=11,
                                                                    regularization_amount=1.0)

# Find all predicted ratings by multiplying U and M
predicted_ratings = np.matmul(U, M)

# Measure RMSE
rmse_training = matrix_factorization_utilities.RMSE(ratings_training_df.as_matrix(), predicted_ratings)
rmse_testing = matrix_factorization_utilities.RMSE(ratings_testing_df.as_matrix(), predicted_ratings)

print("Training RMSE: {}".format(rmse_training))
print("Testing RMSE: {}".format(rmse_testing))
    'movie_ratings_data_set_training.csv')
users_movie_ratings_testing = pandas.read_csv(
    'movie_ratings_data_set_testing.csv')

users_movie_ratings_training_pivot_table = pandas.pivot_table(
    users_movie_ratings_training,
    index='user_id',
    columns='movie_id',
    aggfunc=numpy.max)
users_movie_ratings_testing_pivot_table = pandas.pivot_table(
    users_movie_ratings_testing,
    index='user_id',
    columns='movie_id',
    aggfunc=numpy.max)

U, M = matrix_factorization_utilities.low_rank_matrix_factorization(
    users_movie_ratings_training_pivot_table.as_matrix(),
    num_features=11,
    regularization_amount=1.1)

predicted_ratings = numpy.matmul(U, M)

# Measure RMSE
rmse_training = matrix_factorization_utilities.RMSE(
    users_movie_ratings_training_pivot_table.as_matrix(), predicted_ratings)
rmse_testing = matrix_factorization_utilities.RMSE(
    users_movie_ratings_testing_pivot_table.as_matrix(), predicted_ratings)

print('Training RMSE: {}'.format(rmse_training))
print('Testing RMSE: {}'.format(rmse_testing))