def evaluate_fine_tune(model, df_val, top_K, random_samples, num_items): model.eval() avg_HR = np.zeros((len(df_val), top_K)) avg_NDCG = np.zeros((len(df_val), top_K)) for i in range(len(df_val)): test_user_input, test_item_input = get_test_instances_with_random_samples( df_val[i], random_samples, num_items, device) y_hat = model(test_user_input, test_item_input) y_hat = y_hat.cpu().detach().numpy().reshape((-1, )) test_item_input = test_item_input.cpu().detach().numpy().reshape( (-1, )) map_item_score = {} for j in range(len(y_hat)): map_item_score[test_item_input[j]] = y_hat[j] for k in range(top_K): # Evaluate top rank list ranklist = heapq.nlargest(k, map_item_score, key=map_item_score.get) gtItem = test_item_input[0] avg_HR[i, k] = getHitRatio(ranklist, gtItem) avg_NDCG[i, k] = getNDCG(ranklist, gtItem) avg_HR = np.mean(avg_HR, axis=0) avg_NDCG = np.mean(avg_NDCG, axis=0) return avg_HR, avg_NDCG
#%% evaluate testData = Variable(testData.float()) with torch.no_grad(): avg_HR = np.zeros((len(test_features), top_K)) avg_NDCG = np.zeros((len(test_features), top_K)) for i in range(len(test_features)): y_hat = clf_model(testData[i]) # _, predicted = torch.max(y_hat.data, 0) for ki in range(top_K): # Evaluate top rank list idx = torch.topk(y_hat.data, k=ki, dim=0)[1] ranklist = idx.tolist() gtItem = test_careers['like_id'][i] avg_HR[i, ki] = getHitRatio(ranklist, gtItem) avg_NDCG[i, ki] = getNDCG(ranklist, gtItem) avg_HR = np.mean(avg_HR, axis=0) avg_NDCG = np.mean(avg_NDCG, axis=0) np.savetxt('results/avg_HR_CLF.txt', avg_HR) np.savetxt('results/avg_NDCG_CLF.txt', avg_NDCG) #%% evaluate fairness import sys sys.stdout = open("dnnClf_output.txt", "w") with torch.no_grad(): y_hat = clf_model(testData) device = torch.device("cpu")