def test_load_pandas_df( size, num_samples, num_movies, movie_example, title_example, genres_example, year_example, tmp, ): """Test MovieLens dataset load as pd.DataFrame""" # Test if correct data are loaded header = ["a", "b", "c"] df = load_pandas_df(size=size, local_cache_path=tmp, header=header) assert len(df) == num_samples assert len(df.columns) == len(header) # Test if raw-zip file, rating file, and item file are cached assert len(os.listdir(tmp)) == 3 # Test title, genres, and released year load header = ["a", "b", "c", "d", "e"] with pytest.warns(Warning): df = load_pandas_df( size=size, header=header, local_cache_path=tmp, title_col="Title", genres_col="Genres", year_col="Year", ) assert len(df) == num_samples assert ( len(df.columns) == 7 ) # 4 header columns (user, item, rating, timestamp) and 3 feature columns assert "e" not in df.columns # only the first 4 header columns are used # Get two records of the same items and check if the item-features are the same. head = df.loc[df["b"] == movie_example][:2] title = head["Title"].values assert title[0] == title[1] assert title[0] == title_example genres = head["Genres"].values assert genres[0] == genres[1] assert genres[0] == genres_example year = head["Year"].values assert year[0] == year[1] assert year[0] == year_example # Test default arguments df = load_pandas_df(size) assert len(df) == num_samples # user, item, rating and timestamp assert len(df.columns) == 4
def test_load_pandas_df_mock_100__with_custom_param__succeed(): df = load_pandas_df( "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL ) assert type(df[DEFAULT_TITLE_COL]) == Series assert type(df[DEFAULT_GENRE_COL]) == Series assert len(df) == 100 assert "|" in df.loc[0, DEFAULT_GENRE_COL] assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
def test_model_lightgcn(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml") user_file = os.path.join(data_path, r"user_embeddings.csv") item_file = os.path.join(data_path, r"item_embeddings.csv") df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) hparams = prepare_hparams(yaml_file, epochs=1) model = LightGCN(hparams, data) assert model.run_eval() is not None model.fit() assert model.recommend_k_items(test) is not None model.infer_embedding(user_file, item_file) assert os.path.getsize(user_file) != 0 assert os.path.getsize(item_file) != 0
def test_lightgcn_component_definition(deeprec_config_path): yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml") df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) embed_size = 64 hparams = prepare_hparams(yaml_file, embed_size=embed_size) model = LightGCN(hparams, data) assert model.norm_adj is not None assert model.ua_embeddings.shape == [data.n_users, embed_size] assert model.ia_embeddings.shape == [data.n_items, embed_size] assert model.u_g_embeddings is not None assert model.pos_i_g_embeddings is not None assert model.neg_i_g_embeddings is not None assert model.batch_ratings is not None assert model.loss is not None assert model.opt is not None
def test_load_pandas_df_mock_100__with_default_param__succeed(): df = load_pandas_df("mock100") assert type(df) == pandas.DataFrame assert len(df) == 100 assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()