def test_e2e(spark, pandas_dummy_dataset, header): sar = SARPlus(spark, **header) df = spark.createDataFrame(pandas_dummy_dataset) sar.fit(df) test_df = spark.createDataFrame( pd.DataFrame({header["col_user"]: [3], header["col_item"]: [2]}) ) r1 = ( sar.recommend_k_items_slow(test_df, top_k=3, remove_seen=False) .toPandas() .sort_values([header["col_user"], header["col_item"]]) .reset_index(drop=True) ) r2 = ( sar.recommend_k_items( test_df, "tests/test_e2e_cache", top_k=3, n_user_prediction_partitions=2, remove_seen=False, ) .toPandas() .sort_values([header["col_user"], header["col_item"]]) .reset_index(drop=True) ) assert (r1.iloc[:, :2] == r2.iloc[:, :2]).all().all() assert np.allclose(r1.score.values, r2.score.values, 1e-3)
def test_e2e(spark, pandas_dummy_dataset, header): sar = SARPlus(spark, **header) df = spark.createDataFrame(pandas_dummy_dataset) sar.fit(df) # assert 4*4 + 32 == sar.item_similarity.count() # print(sar.item_similarity # .toPandas() # .pivot_table(index='i1', columns='i2', values='value')) test_df = spark.createDataFrame( pd.DataFrame({ header['col_user']: [3], header['col_item']: [2] })) r1 = sar.recommend_k_items_slow(test_df, top_k=3, remove_seen=False)\ .toPandas()\ .sort_values([header['col_user'], header['col_item']])\ .reset_index(drop=True) r2 = sar.recommend_k_items(test_df, "tests/test_e2e_cache", top_k=3, n_user_prediction_partitions=2, remove_seen=False)\ .toPandas()\ .sort_values([header['col_user'], header['col_item']])\ .reset_index(drop=True) assert (r1.iloc[:, :2] == r2.iloc[:, :2]).all().all() assert np.allclose(r1.score.values, r2.score.values, 1e-3)