Пример #1
0
def test_sparse_to_df(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # generate the sparse matrix representation
    X, _, _ = am.gen_affinity_matrix()

    # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
    DF = am.map_back_sparse(X, kind="ratings")

    # tests: check that the two dataframes have the same elements in the same positions.
    assert (
        DF.userID.values.all()
        == python_dataset.sort_values(by=["userID"]).userID.values.all()
    )

    assert (
        DF.itemID.values.all()
        == python_dataset.sort_values(by=["userID"]).itemID.values.all()
    )

    assert (
        DF.rating.values.all()
        == python_dataset.sort_values(by=["userID"]).rating.values.all()
    )
Пример #2
0
def test_sparse_to_df(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # generate the sparse matrix representation
    X = am.gen_affinity_matrix()

    # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
    DF = am.map_back_sparse(X, kind="ratings")

    # tests: check that the two dataframes have the same elements in the same positions.
    assert (
        DF.userID.values.all()
        == python_dataset.sort_values(by=["userID"]).userID.values.all()
    )

    assert (
        DF.itemID.values.all()
        == python_dataset.sort_values(by=["userID"]).itemID.values.all()
    )

    assert (
        DF.rating.values.all()
        == python_dataset.sort_values(by=["userID"]).rating.values.all()
    )
Пример #3
0
def test_df_to_sparse(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # obtain the sparse matrix representation of the input dataframe
    X, _, _ = am.gen_affinity_matrix()

    # check that the generated matrix has the correct dimensions
    assert (X.shape[0] == python_dataset.userID.unique().shape[0]) & (
        X.shape[1] == python_dataset.itemID.unique().shape[0])
Пример #4
0
def test_df_to_sparse(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # obtain the sparse matrix representation of the input dataframe
    X = am.gen_affinity_matrix()

    # check that the generated matrix has the correct dimensions
    assert (X.shape[0] == python_dataset.userID.unique().shape[0]) & (
        X.shape[1] == python_dataset.itemID.unique().shape[0]
    )
def RBMtrain():
    data = pd.read_csv("SnacksData100.csv")
    header = {
        "col_user": "******",
        "col_item": "Product_Id",
        "col_rating": "Ratings",
    }
    am = AffinityMatrix(DF=data, **header)
    X = am.gen_affinity_matrix()
    Xtr, Xtst = numpy_stratified_split(X)
    model = RBM(hidden_units=600,
                training_epoch=30,
                minibatch_size=60,
                keep_prob=0.9,
                with_metrics=True)
    model.fit(Xtr, Xtst)
    top_k, test_time = model.recommend_k_items(Xtst)
    top_k_df = am.map_back_sparse(top_k, kind='prediction')
    test_df = am.map_back_sparse(Xtst, kind='ratings')
    joblib.dump(top_k_df, 'testdata')
Пример #6
0
smpl = pd.merge(data, titles, on="MovieID").sample(SMPLS)
smpl['MovieTitle'] = smpl['MovieTitle'].str[:TITLEN]
smpl['Rating'] = pd.to_numeric(smpl['Rating'], downcast='integer')
del smpl['Timestamp']  # Drop the column from printing.
print(smpl.to_string())

header = {
    "col_user": "******",
    "col_item": "MovieID",
    "col_rating": "Rating",
}

# Use a sparse matrix representation rather than a pandas data frame
# for significant performance gain.

am = AffinityMatrix(DF=data, **header)
X = am.gen_affinity_matrix()

# Contstruct the training and test datasets.

Xtr, Xtst = numpy_stratified_split(X)

print('\nTraining matrix size (users, movies) is:', Xtr.shape)
print('Testing matrix size is: ', Xtst.shape)

# Initialize the model class. Note that through random variation we
# can get a much better performing model with seed=1!

model = RBM(
    hidden_units=600,
    training_epoch=30,