def test(model_save_path, root_dir, p_r_root, day, test_log_dir, args, layer, dim): # load test dataset dataset = DataSet(os.path.join(root_dir, day)) # TODO: 测试不同的数据 # load model running_context = torch.device("cuda:1") model = GCN(args, running_context, layer, dim).to(running_context) model.load_state_dict(torch.load(model_save_path)) model.eval() # test log test_log_list = [[] for _ in range(24)] test_count = 0 print("\n" + "+" * 20 + " Test on day {} layer {} dim {} ".format(day, layer, dim) + "+" * 20) for hour in range(0, 24): data = dataset[hour] edge_index = data.edge_index.to(running_context) mask = data.mask.to(running_context) logit = model(data.inputs, edge_index) label = data.label.to(running_context, non_blocking=True) acc, tn, fp, fn, tp, white_p, white_r, white_incorrect, black_p, black_r, black_incorrect, macro_f1, micro_f1 = evaluate(logit[mask].max(1)[1], label) test_log_list[test_count].append(float(acc)) test_log_list[test_count].append(float(white_p)) test_log_list[test_count].append(float(white_r)) test_log_list[test_count].append(float(white_incorrect)) test_log_list[test_count].append(float(black_p)) test_log_list[test_count].append(float(black_r)) test_log_list[test_count].append(float(black_incorrect)) test_log_list[test_count].append(float(macro_f1)) test_log_list[test_count].append(float(micro_f1)) test_count += 1 test_log = "hour:{:3d}, acc: {:.4f}" \ "TN={:6d}, FP={:6d}, FN={:6d}, TP={:6d}, " \ "white_p={:.4f}, white_r={:.4f}, " \ "black_p={:.4f}, black_r={:.4f}, " \ "macro_f1: {:.4f}, micro_f1: {:.4f}" print(test_log.format(hour, acc, tn, fp, fn, tp, white_p, white_r, black_p, black_r, macro_f1, micro_f1)) logit = np.array(norm(torch.sigmoid(logit[mask]).cpu().detach().numpy())) label = np.array(label.cpu().detach().numpy()).reshape(-1, 1) p_r = np.concatenate((logit, label), axis=1) p_r_cols = ["neg_pro", "pos_pro", "label"] p_r_df = pd.DataFrame(np.array(p_r), columns=p_r_cols) p_r_dir = os.path.join(p_r_root, "test_{}_hour_{}_layer_{}_dim_{}_pr.csv" \ .format(day.split("-")[-1], hour, layer, dim)) p_r_df.to_csv(p_r_dir, index=None, columns=p_r_cols) # save test logs to csv file print("Start to save test log of {}.".format(day)) test_log_cols = ["acc", "white_p", "white_r", "white_incorrect", "black_p", "black_r", "black_incorrect", "macro_f1", "micro_f1"] test_log_df = pd.DataFrame(np.array(test_log_list), columns=test_log_cols) test_log_df.to_csv(test_log_dir, float_format="%.4f", index=None, columns=test_log_cols) print("Save test log of day {} layer {} dim {} successfully.".format(day, layer, dim)) torch.cuda.empty_cache()
class GCNTrainer(Trainer): def __init__(self, parameter, emb_matrix=None): self.parameter = parameter self.emb_matrix = emb_matrix self.model = GCN(parameter, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] self.optimizer = torch.optim.SGD(self.parameters, lr=0.5, weight_decay=0) def update(self, batch): inputs, labels = unpack_batch(batch) self.model.train() #开启dropout self.optimizer.zero_grad() logits, pooling_output = self.model(inputs) loss = self.criterion(logits, labels) #正则化项 loss += 0.003 * (pooling_output**2).sum(1).mean() loss_val = loss.item() # 反向传播 loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5) self.optimizer.step() return loss_val def predict(self, batch): inputs, labels = unpack_batch(batch) self.model.eval() #关闭dropout orig_idx = batch[-1] logits, _ = self.model(inputs) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] return predictions