def get_metric(score_label): Precision = np.zeros(20) NDCG = np.zeros(20) AUC = 0. score_df = pd.DataFrame(score_label, columns=['uid', 'score', 'label']) num = 0 score_label_all = [] for uid, hist in score_df.groupby('uid'): if hist.shape[0] < 10: continue score = hist['score'].tolist() label = hist['label'].tolist() score_label_u = [] for i in range(len(score)): score_label_u.append([score[i], label[i]]) score_label_all.append([score[i], label[i]]) precision, ndcg, auc, mae, mrse = calc_metric(score_label_u) Precision += precision NDCG += ndcg AUC += auc num += 1 score_label_all = sorted(score_label_all, key=lambda d: d[0], reverse=True) GPrecision = np.array([ eval.precision_k(score_label_all, k * len(score_label_all) / 100) for k in range(1, 21) ]) GAUC = eval.auc(score_label_all) MAE = eval.mae(score_label_all) MRSE = eval.mrse(score_label_all) return Precision / num, NDCG / num, AUC / num, GPrecision, GAUC, MAE, MRSE
def _eval(sess, model, test_set_list): loss_sum = 0. Precision = 0. Recall = 0. F1 = 0. AUC = 0. NDCG = 0. num = 0 score_label_all = [] for i in range(len(test_set_list)): uid = test_set_list[i][0][0] u_his_all = u_his_list[uid] test_set_list_u = test_set_list[i] uid_list, iid_list, label_list = [], [], [] u_his, u_his_l = [], [] for s in test_set_list_u: uid_list.append(uid) iid_list.append(s[1]) label_list.append(s[2]) u_his_u = [] for i in u_his_all: if i == s[1]: break u_his_u.append(i) u_his_l_u = len(u_his_u) if u_his_l_u <= 0: u_his_u = [0] u_his.append(u_his_u) u_his_l.append(u_his_l_u) for k in range(2): neg = s[1] while neg == s[1]: neg = np.random.randint(0, item_count) uid_list.append(uid) iid_list.append(neg) label_list.append(0) u_his.append(u_his_u) u_his_l.append(u_his_l_u) u_his_maxlength = max(max(u_his_l), 1) u_hisinput = np.zeros([len(uid_list), u_his_maxlength], dtype=np.int32) for i, ru in enumerate(u_his): u_hisinput[i, :len(ru)] = ru[:len(ru)] datainput = (uid_list, iid_list, label_list) score, loss = model.eval(sess, datainput, u_hisinput, u_his_l) score_label_u = [] for i in range(len(score)): score_label_u.append([score[i], label_list[i]]) score_label_all.append([score[i], label_list[i]]) precision, recall, f1, auc, ndcg = calc_metric(score_label_u) loss_sum += loss Precision += precision Recall += recall F1 += f1 AUC += auc NDCG += ndcg num += 1 score_label_all = sorted(score_label_all, key=lambda d: d[0], reverse=True) GP = eval.precision_k(score_label_all, 0.3 * len(score_label_all)) GAUC = eval.auc(score_label_all) return loss_sum / num, Precision / num, Recall / num, F1 / num, AUC / num, NDCG / num, GP, GAUC
def calc_metric(score_label_u): score_label_u = sorted(score_label_u, key=lambda d: d[0], reverse=True) precision = np.array( [eval.precision_k(score_label_u, k) for k in range(1, 21)]) ndcg = np.array([eval.ndcg_k(score_label_u, k) for k in range(1, 21)]) auc = eval.auc(score_label_u) mae = eval.mae(score_label_u) mrse = eval.mrse(score_label_u) return precision, ndcg, auc, mae, mrse
def calc_metric(score_label_u): score_label_u = sorted(score_label_u, key=lambda d: d[0], reverse=True) precision = eval.precision_k(score_label_u, 3) recall = eval.recall_k(score_label_u, 3) try: f1 = 2 * precision * recall / (precision + recall) except: f1 = 0 auc = eval.auc(score_label_u) ndcg = eval.ndcg_k(score_label_u, 3) return precision, recall, f1, auc, ndcg
eps = 0.2 print("Precision-Recall curve") thresholds = np.arange(0.01, 0.99, 0.01) all_files = np.array(sorted(os.listdir(frames_path))) recall_tot = np.zeros(len(thresholds)) precision_tot = np.zeros(len(thresholds)) for i, t in enumerate(thresholds): cuts = scores_to_cuts(threshold=t, scores=scores, all_files=all_files, frame_rate=frame_rate) recall, precision = eval.eval_performances(cuts=cuts, gt=gt, eps=eps) recall_tot[i] = recall precision_tot[i] = precision plt.plot(recall_tot, precision_tot) plt.xlabel('Recall') plt.ylabel('Precision') plt.show() idx = recall_tot.argsort() recall_tot = recall_tot[idx] precision_tot = precision_tot[idx] auc = eval.auc(x=recall_tot, y=precision_tot) print("AUC:\t{}".format(auc))
enconder = LabelEncoder() clicks_dataset['item_id'] = enconder.fit_transform(clicks_dataset.item_id) clicks_dataset['label'] = clicks_dataset.session_id.isin(buy_dataset.session_id) mean_ = clicks_dataset.drop_duplicates('session_id')['label'].mean() print(mean_) clicks_dataset.to_csv('temp.csv', index=False) dataset = YCBDataset(root='../', dataset=clicks_dataset) dataset = dataset.shuffle() x = 160000 y = 180000 train_dataset = dataset[:x] val_dataset = dataset[x:y] test_dataset = dataset[y:] batch_size = 256 train_data = DataLoader(train_dataset, batch_size=batch_size) test_data = DataLoader(test_dataset, batch_size=batch_size) val_data = DataLoader(val_dataset, batch_size=batch_size) print("Finished Splitting Dataset") model = Rec2().to('cpu') print("Starting Run of the Network") for epoch in range(10): loss = train(model, train_data) train_accuracy = auc(model, train_data) eval_accurancy = auc(model, val_data) test_accuracy = auc(model, test_data) print(f"Epoch: {epoch + 1}; Loss: {loss}; Train Acc: {train_accuracy}, Val Acc: {eval_accurancy}, " f"Test Acc: {test_accuracy}")