def test_popularity(self): """ Test the popularity okapi algorithm """ splitter = RandomSplitter() training, testing = splitter.split(self.df, 0.20) pop = Popularity(normalize=False) pop.fit(training) self.popularity.fit(training) for _, row in testing.iterrows(): assert row["user"] in training["user"] python_score = pop.getScore(row["user"], row["item"]) okapi_score = self.popularity.getScore(row["user"], row["item"]) assert okapi_score == python_score, \ "Okapi popularity(%f) don't give the same score as his python implementation(%f)" % (okapi_score, python_score)
def test_popularity_score_against_testfm(self): """ [recommendation.models.TensorCoFi] Test popularity scores with test.fm benchmark """ evaluator = Evaluator() training, testing = testfm.split.holdoutByRandom(self.df, 0.9) items = training.item.unique() tc = Popularity(len(items)) ptc = TFMPopularity() tc.fit(training) ptc.fit(training) tc_score = evaluator.evaluate_model(tc, testing, all_items=items)[0] ptc_score = evaluator.evaluate_model(ptc, testing, all_items=items)[0] assert abs(tc_score-ptc_score) < .1, \ "Popularity score is not close enough to testfm benchmark (%.3f != %.3f)" % (tc_score, ptc_score)
# specific environments. # # Thank you. # ######################################### # Tell me what models we want to evaluate models = [ RandomModel(), PopularityOkapi(hadoop_source="/data/b.ajf/hadoop1_env.sh", host="igraph-01", okapi_jar_dir="okapi/jar/", #host='54.72.18.118', user='******', #okapi_jar_dir='/Users/linas/devel/okapi/target/', #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar', #public_key_path='/Users/linas/.ssh/hack-okapi.pem' ), Popularity(normalize=False), BPROkapi(hadoop_source="/data/b.ajf/hadoop1_env.sh", host="igraph-01", okapi_jar_dir="okapi/jar/", #host='54.72.18.118', user='******', #okapi_jar_dir='/Users/linas/devel/okapi/target/', # #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar', #public_key_path='/Users/linas/.ssh/hack-okapi.pem' ) ] # Setup the environment evaluator = Evaluator() for m in models: m.fit(df)