def test_python_errors(python_data): rating_true, rating_pred, _ = python_data(binary_rating=False) with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******")
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item" ) with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******" )
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******")
def test_python_ndcg_at_k(python_data, target_metrics): rating_true, rating_pred = python_data assert (ndcg_at_k( k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction="rating", ) == 1) assert ndcg_at_k(rating_true, rating_pred, k=10) == target_metrics["ndcg"]
def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit): assert (ndcg_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == 1) assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0 assert ndcg_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.38172, TOL)
def test_python_ndcg_at_k(python_data, target_metrics): rating_true, rating_pred, rating_nohit = python_data(binary_rating=False) assert ndcg_at_k( k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, ) == 1 assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0 assert ndcg_at_k(rating_true, rating_pred, k=10) == target_metrics["ndcg"]
def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit): assert ( ndcg_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == 1 ) assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0 assert ndcg_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.38172, TOL)
def on_epoch_end(self, batch, logs={}): """At the end of each epoch calculate NDCG@k of the validation set. If the model performance is improved, the model weights are saved. Update the list of validation NDCG@k by adding obtained value.""" # recommend top k items based on training part of validation set top_k = self.recommend_k_items(x=self.val_tr, k=self.k, remove_seen=True ) # convert recommendations from sparse matrix to dataframe top_k_df = self.mapper.map_back_sparse(top_k, kind='prediction') test_df = self.mapper.map_back_sparse(self.val_te, kind='ratings') # calculate NDCG@k NDCG = ndcg_at_k(test_df, top_k_df, col_prediction='prediction', k=self.k) # check if there is an improvement in NDCG, if so, update the weights of the saved model if NDCG > self.best_ndcg: self.best_ndcg = NDCG # save the weights of the optimal model if self.save_path is not None: self.model.save(self.save_path) self._data.append(NDCG)
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), }
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT) }
def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K): eval_map = map_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_ndcg = ndcg_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_precision = precision_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_recall = recall_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) df_result = pd.DataFrame( { "Dataset": data_size, "K": TOPK, "MAP": eval_map, "nDCG@k": eval_ndcg, "Precision@k": eval_precision, "Recall@k": eval_recall, "Train time (s)": time_train, "Test time (s)": time_test }, index=[0]) return df_result
def test_predict_ranking(rating_true): train_set = cornac.data.Dataset.from_uir(rating_true.itertuples(index=False), seed=42) bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set) preds = predict_ranking(bpr, rating_true, remove_seen=False) n_users = len(rating_true["userID"].unique()) n_items = len(rating_true["itemID"].unique()) assert preds.shape[0] == n_users * n_items assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes # perfect ranking achieved assert 1e-10 > 1 - ndcg_at_k(rating_true, preds) assert 1e-10 > 1 - recall_at_k(rating_true, preds)
def run_eval(self): """Run evaluation on self.data.test. Returns: dict: Results of all metrics in self.metrics. """ topk_scores = self.recommend_k_items(self.data.test, top_k=self.top_k, use_id=True) ret = [] for metric in self.metrics: if metric == "map": ret.append( map_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "ndcg": ret.append( ndcg_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "precision": ret.append( precision_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "recall": ret.append( recall_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) return ret
top_k['UserId'] = pd.to_numeric(top_k['UserId']) top_k['MovieId'] = pd.to_numeric(top_k['MovieId']) # evaluate eval_map = map_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_ndcg = ndcg_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_precision = precision_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_recall = recall_at_k(test, top_k, col_user="******", col_item="MovieId",
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred[df_pred["userID"] == 3] df_true = df_true[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
logger.debug(f"Prediction: {col_prediction}") logger.debug(f"Relevancy: {relevancy_method}") logger.debug(f"K: {k}") logger.debug(f"Threshold: {threshold}") logger.debug(f"Rating True path: {args.rating_true}") logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}") logger.debug(f"Rating Pred path: {args.rating_pred}") logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}") eval_ndcg = ndcg_at_k( rating_true, rating_pred, col_user=col_user, col_item=col_item, col_rating=col_rating, col_prediction=col_prediction, relevancy_method=relevancy_method, k=k, threshold=threshold, ) logger.debug(f"Score: {eval_ndcg}") # Log to AzureML dashboard run = Run.get_context() run.parent.log("nDCG at {}".format(k), eval_ndcg) score_result = pd.DataFrame({"ndcg_at_k": [eval_ndcg]}) save_data_frame_to_directory( args.score_result,
start = batch_idx * BATCH_SIZE end = min((batch_idx + 1) * BATCH_SIZE, n_users) batch_users = all_users[start:end] batch_predictions = all_predictions[all_predictions["userID"].isin( batch_users)] batch_train = train[train["userID"].isin(batch_users)] batch_merged = pd.merge(batch_train, batch_predictions, on=["userID", "itemID"], how="outer") batch_predictions = batch_merged[batch_merged.rating.isnull()].drop( 'rating', axis=1) batch_test = test[test["userID"].isin(batch_users)] # eval_map = map_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_ndcg = ndcg_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_precision = precision_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_recall = recall_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) ndcg.append(eval_ndcg) hr.append(eval_precision) recall.append(eval_recall) del batch_train del batch_predictions del batch_merged
with Timer() as test_time: top_k = model.recommend_k_items(test, remove_seen=True) # print("Took {} seconds for prediction.".format(test_time.interval)) # top_k.head() eval_map = map_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_ndcg = ndcg_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_precision = precision_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_recall = recall_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_rmse = rmse(test,
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
top_k_scores[USER] = top_k_scores['UserId'] top_k_scores[ITEM] = top_k_scores['SnackId'] top_k_scores[PREDICTION] = top_k_scores['Prediction'] top_k_scores.head() # st.write("Top k Scores:",top_k_scores) data_test.head() # st.write("Data_Test:",data_test) eval_map = map_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("MAP:",eval_map) eval_ndcg = ndcg_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("NDCG:",eval_ndcg) eval_precision = precision_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Precision:",eval_precision) eval_recall = recall_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Recall:",eval_recall) print("Model:\t" + learn.__class__.__name__, "Top K:\t%d" % TOP_K, "MAP:\t%f" % eval_map, "NDCG:\t%f" % eval_ndcg, "Precision@K:\t%f" % eval_precision,
model.fit(train) top_k = model.recommend_k_items(test, remove_seen=True) top_k_with_titles = (top_k.join( data[['MovieId', 'Title']].drop_duplicates().set_index('MovieId'), on='MovieId', how='inner').sort_values(by=['UserId', 'Prediction'], ascending=False)) args = [test, top_k] kwargs = dict(col_user='******', col_item='MovieId', col_rating='Rating', col_prediction='Prediction', relevancy_method='top_k', k=TOP_K) eval_map = map_at_k(*args, **kwargs) eval_ndcg = ndcg_at_k(*args, **kwargs) eval_precision = precision_at_k(*args, **kwargs) eval_recall = recall_at_k(*args, **kwargs) print(f"Model:", f"Top K:\t\t {TOP_K}", f"MAP:\t\t {eval_map:f}", f"NDCG:\t\t {eval_ndcg:f}", f"Precision@K:\t {eval_precision:f}", f"Recall@K:\t {eval_recall:f}", sep='\n')