def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item" ) with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******" )
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******")
def test_python_errors(python_data): rating_true, rating_pred, _ = python_data(binary_rating=False) with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******")
def test_python_map_at_k(python_data, target_metrics): rating_true, rating_pred = python_data assert (map_at_k( k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction="rating", ) == 1) assert map_at_k(rating_true, rating_pred, k=10) == target_metrics["map"]
def test_python_map_at_k(rating_true, rating_pred, rating_nohit): assert (map_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == 1) assert map_at_k(rating_true, rating_nohit, k=10) == 0.0 assert map_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.23613, TOL)
def test_python_map_at_k(python_data, target_metrics): rating_true, rating_pred, rating_nohit = python_data(binary_rating=False) assert map_at_k( k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, ) == 1 assert map_at_k(rating_true, rating_nohit, k=10) == 0.0 assert map_at_k(rating_true, rating_pred, k=10) == target_metrics["map"]
def test_python_map_at_k(rating_true, rating_pred, rating_nohit): assert ( map_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == 1 ) assert map_at_k(rating_true, rating_nohit, k=10) == 0.0 assert map_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.23613, TOL)
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), }
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT) }
def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K): eval_map = map_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_ndcg = ndcg_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_precision = precision_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_recall = recall_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) df_result = pd.DataFrame( { "Dataset": data_size, "K": TOPK, "MAP": eval_map, "nDCG@k": eval_ndcg, "Precision@k": eval_precision, "Recall@k": eval_recall, "Train time (s)": time_train, "Test time (s)": time_test }, index=[0]) return df_result
def run_eval(self): """Run evaluation on self.data.test. Returns: dict: Results of all metrics in self.metrics. """ topk_scores = self.recommend_k_items(self.data.test, top_k=self.top_k, use_id=True) ret = [] for metric in self.metrics: if metric == "map": ret.append( map_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "ndcg": ret.append( ndcg_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "precision": ret.append( precision_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "recall": ret.append( recall_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) return ret
start_time = time.time() top_k = model.recommend_k_items(test) test_time = time.time() - start_time run.log(name="Prediction time", value=test_time) # TODO: remove this call when the model returns same type as input top_k['UserId'] = pd.to_numeric(top_k['UserId']) top_k['MovieId'] = pd.to_numeric(top_k['MovieId']) # evaluate eval_map = map_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_ndcg = ndcg_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_precision = precision_at_k(test, top_k, col_user="******", col_item="MovieId",
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred[df_pred["userID"] == 3] df_true = df_true[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
logger.debug(f"Prediction: {col_prediction}") logger.debug(f"Relevancy: {relevancy_method}") logger.debug(f"K: {k}") logger.debug(f"Threshold: {threshold}") logger.debug(f"Rating True path: {args.rating_true}") logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}") logger.debug(f"Rating Pred path: {args.rating_pred}") logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}") eval_map = map_at_k( rating_true, rating_pred, col_user=col_user, col_item=col_item, col_rating=col_rating, col_prediction=col_prediction, relevancy_method=relevancy_method, k=k, threshold=threshold, ) logger.debug(f"Score: {eval_map}") # Log to AzureML dashboard run = Run.get_context() run.parent.log("MAP at {}".format(k), eval_map) score_result = pd.DataFrame({"map_at_k": [eval_map]}) save_data_frame_to_directory( args.score_result,
with Timer() as train_time: model.fit(train) # print("Took {} seconds for training.".format(train_time.interval)) with Timer() as test_time: top_k = model.recommend_k_items(test, remove_seen=True) # print("Took {} seconds for prediction.".format(test_time.interval)) # top_k.head() eval_map = map_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_ndcg = ndcg_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_precision = precision_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_recall = recall_at_k(test,
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
test_time = time.time() - start_time # st.write("Test_Time:",test_time) print("Took {} seconds for {} predictions.".format(test_time, len(training_removed))) top_k_scores[USER] = top_k_scores['UserId'] top_k_scores[ITEM] = top_k_scores['SnackId'] top_k_scores[PREDICTION] = top_k_scores['Prediction'] top_k_scores.head() # st.write("Top k Scores:",top_k_scores) data_test.head() # st.write("Data_Test:",data_test) eval_map = map_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("MAP:",eval_map) eval_ndcg = ndcg_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("NDCG:",eval_ndcg) eval_precision = precision_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Precision:",eval_precision) eval_recall = recall_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Recall:",eval_recall) print("Model:\t" + learn.__class__.__name__,
model.fit(train) top_k = model.recommend_k_items(test, remove_seen=True) top_k_with_titles = (top_k.join( data[['MovieId', 'Title']].drop_duplicates().set_index('MovieId'), on='MovieId', how='inner').sort_values(by=['UserId', 'Prediction'], ascending=False)) args = [test, top_k] kwargs = dict(col_user='******', col_item='MovieId', col_rating='Rating', col_prediction='Prediction', relevancy_method='top_k', k=TOP_K) eval_map = map_at_k(*args, **kwargs) eval_ndcg = ndcg_at_k(*args, **kwargs) eval_precision = precision_at_k(*args, **kwargs) eval_recall = recall_at_k(*args, **kwargs) print(f"Model:", f"Top K:\t\t {TOP_K}", f"MAP:\t\t {eval_map:f}", f"NDCG:\t\t {eval_ndcg:f}", f"Precision@K:\t {eval_precision:f}", f"Recall@K:\t {eval_recall:f}", sep='\n')