def test_python_recall(rating_true, rating_pred, rating_nohit): assert (recall_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == pytest.approx(1, TOL)) assert recall_at_k(rating_true, rating_nohit, k=10) == 0.0 assert recall_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), }
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******", ) with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item", ) with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******", )
def test_recommend_k_items(rating_true): train_set = cornac.data.Dataset.from_uir( rating_true.itertuples(index=False), seed=42) bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set) preds = predict_ranking(bpr, rating_true, remove_seen=False) n_users = len(rating_true["userID"].unique()) n_items = len(rating_true["itemID"].unique()) assert preds.shape[0] == n_users * n_items assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes # perfect ranking achieved assert 1e-10 > 1 - ndcg_at_k(rating_true, preds) assert 1e-10 > 1 - recall_at_k(rating_true, preds)
def run_eval(self): """Run evaluation on self.data.test. Returns: dict: Results of all metrics in `self.metrics`. """ topk_scores = self.recommend_k_items(self.data.test, top_k=self.top_k, use_id=True) ret = [] for metric in self.metrics: if metric == "map": ret.append( map_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "ndcg": ret.append( ndcg_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "precision": ret.append( precision_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "recall": ret.append( recall_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) return ret
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) assert recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL)
logger.debug(f"Prediction: {col_prediction}") logger.debug(f"Relevancy: {relevancy_method}") logger.debug(f"K: {k}") logger.debug(f"Threshold: {threshold}") logger.debug(f"Rating True path: {args.rating_true}") logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}") logger.debug(f"Rating Pred path: {args.rating_pred}") logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}") eval_recall = recall_at_k( rating_true, rating_pred, col_user=col_user, col_item=col_item, col_rating=col_rating, col_prediction=col_prediction, relevancy_method=relevancy_method, k=k, threshold=threshold, ) logger.debug(f"Score: {eval_recall}") # Log to AzureML dashboard run = Run.get_context() run.parent.log("Recall at {}".format(k), eval_recall) score_result = pd.DataFrame({"recall_at_k": [eval_recall]}) save_data_frame_to_directory( args.score_result,