def test_sparse_to_df(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # generate the sparse matrix representation X, _, _ = am.gen_affinity_matrix() # use the inverse function to generate a pandas df from a sparse matrix ordered by userID DF = am.map_back_sparse(X, kind="ratings") # tests: check that the two dataframes have the same elements in the same positions. assert ( DF.userID.values.all() == python_dataset.sort_values(by=["userID"]).userID.values.all() ) assert ( DF.itemID.values.all() == python_dataset.sort_values(by=["userID"]).itemID.values.all() ) assert ( DF.rating.values.all() == python_dataset.sort_values(by=["userID"]).rating.values.all() )
def test_sparse_to_df(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # generate the sparse matrix representation X = am.gen_affinity_matrix() # use the inverse function to generate a pandas df from a sparse matrix ordered by userID DF = am.map_back_sparse(X, kind="ratings") # tests: check that the two dataframes have the same elements in the same positions. assert ( DF.userID.values.all() == python_dataset.sort_values(by=["userID"]).userID.values.all() ) assert ( DF.itemID.values.all() == python_dataset.sort_values(by=["userID"]).itemID.values.all() ) assert ( DF.rating.values.all() == python_dataset.sort_values(by=["userID"]).rating.values.all() )
def test_df_to_sparse(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # obtain the sparse matrix representation of the input dataframe X, _, _ = am.gen_affinity_matrix() # check that the generated matrix has the correct dimensions assert (X.shape[0] == python_dataset.userID.unique().shape[0]) & ( X.shape[1] == python_dataset.itemID.unique().shape[0])
def test_df_to_sparse(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # obtain the sparse matrix representation of the input dataframe X = am.gen_affinity_matrix() # check that the generated matrix has the correct dimensions assert (X.shape[0] == python_dataset.userID.unique().shape[0]) & ( X.shape[1] == python_dataset.itemID.unique().shape[0] )
def RBMtrain(): data = pd.read_csv("SnacksData100.csv") header = { "col_user": "******", "col_item": "Product_Id", "col_rating": "Ratings", } am = AffinityMatrix(DF=data, **header) X = am.gen_affinity_matrix() Xtr, Xtst = numpy_stratified_split(X) model = RBM(hidden_units=600, training_epoch=30, minibatch_size=60, keep_prob=0.9, with_metrics=True) model.fit(Xtr, Xtst) top_k, test_time = model.recommend_k_items(Xtst) top_k_df = am.map_back_sparse(top_k, kind='prediction') test_df = am.map_back_sparse(Xtst, kind='ratings') joblib.dump(top_k_df, 'testdata')
smpl = pd.merge(data, titles, on="MovieID").sample(SMPLS) smpl['MovieTitle'] = smpl['MovieTitle'].str[:TITLEN] smpl['Rating'] = pd.to_numeric(smpl['Rating'], downcast='integer') del smpl['Timestamp'] # Drop the column from printing. print(smpl.to_string()) header = { "col_user": "******", "col_item": "MovieID", "col_rating": "Rating", } # Use a sparse matrix representation rather than a pandas data frame # for significant performance gain. am = AffinityMatrix(DF=data, **header) X = am.gen_affinity_matrix() # Contstruct the training and test datasets. Xtr, Xtst = numpy_stratified_split(X) print('\nTraining matrix size (users, movies) is:', Xtr.shape) print('Testing matrix size is: ', Xtst.shape) # Initialize the model class. Note that through random variation we # can get a much better performing model with seed=1! model = RBM( hidden_units=600, training_epoch=30,