示例#1
0
文件: main.py 项目: ifkid/xiaohongshu
def test(model_save_path, root_dir, p_r_root, day, test_log_dir, args, layer, dim):
    # load test dataset
    dataset = DataSet(os.path.join(root_dir, day))  # TODO: 测试不同的数据
    # load model
    running_context = torch.device("cuda:1")
    model = GCN(args, running_context, layer, dim).to(running_context)
    model.load_state_dict(torch.load(model_save_path))
    model.eval()
    # test log
    test_log_list = [[] for _ in range(24)]
    test_count = 0
    print("\n" + "+" * 20 + " Test on day {} layer {} dim {} ".format(day, layer, dim) + "+" * 20)
    for hour in range(0, 24):
        data = dataset[hour]
        edge_index = data.edge_index.to(running_context)
        mask = data.mask.to(running_context)
        logit = model(data.inputs, edge_index)
        label = data.label.to(running_context, non_blocking=True)
        acc, tn, fp, fn, tp, white_p, white_r, white_incorrect, black_p, black_r, black_incorrect, macro_f1, micro_f1 = evaluate(logit[mask].max(1)[1], label)

        test_log_list[test_count].append(float(acc))
        test_log_list[test_count].append(float(white_p))
        test_log_list[test_count].append(float(white_r))
        test_log_list[test_count].append(float(white_incorrect))
        test_log_list[test_count].append(float(black_p))
        test_log_list[test_count].append(float(black_r))
        test_log_list[test_count].append(float(black_incorrect))
        test_log_list[test_count].append(float(macro_f1))
        test_log_list[test_count].append(float(micro_f1))
        test_count += 1
        test_log = "hour:{:3d}, acc: {:.4f}" \
                   "TN={:6d}, FP={:6d}, FN={:6d}, TP={:6d}, " \
                   "white_p={:.4f}, white_r={:.4f}, " \
                   "black_p={:.4f}, black_r={:.4f}, " \
                   "macro_f1: {:.4f}, micro_f1: {:.4f}"
        print(test_log.format(hour, acc, tn, fp, fn, tp, white_p, white_r, black_p, black_r, macro_f1, micro_f1))

        logit = np.array(norm(torch.sigmoid(logit[mask]).cpu().detach().numpy()))
        label = np.array(label.cpu().detach().numpy()).reshape(-1, 1)
        p_r = np.concatenate((logit, label), axis=1)
        p_r_cols = ["neg_pro", "pos_pro", "label"]
        p_r_df = pd.DataFrame(np.array(p_r), columns=p_r_cols)
        p_r_dir = os.path.join(p_r_root, "test_{}_hour_{}_layer_{}_dim_{}_pr.csv" \
                               .format(day.split("-")[-1], hour, layer, dim))

        p_r_df.to_csv(p_r_dir, index=None, columns=p_r_cols)
    # save test logs to csv file
    print("Start to save test log of {}.".format(day))
    test_log_cols = ["acc", "white_p", "white_r", "white_incorrect", "black_p", "black_r", "black_incorrect", "macro_f1", "micro_f1"]
    test_log_df = pd.DataFrame(np.array(test_log_list), columns=test_log_cols)
    test_log_df.to_csv(test_log_dir, float_format="%.4f", index=None, columns=test_log_cols)
    print("Save test log of day {} layer {} dim {} successfully.".format(day, layer, dim))
    torch.cuda.empty_cache()
示例#2
0
class GCNTrainer(Trainer):
    def __init__(self, parameter, emb_matrix=None):
        self.parameter = parameter
        self.emb_matrix = emb_matrix
        self.model = GCN(parameter, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        self.optimizer = torch.optim.SGD(self.parameters,
                                         lr=0.5,
                                         weight_decay=0)

    def update(self, batch):
        inputs, labels = unpack_batch(batch)
        self.model.train()  #开启dropout
        self.optimizer.zero_grad()
        logits, pooling_output = self.model(inputs)
        loss = self.criterion(logits, labels)
        #正则化项
        loss += 0.003 * (pooling_output**2).sum(1).mean()
        loss_val = loss.item()
        # 反向传播
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
        self.optimizer.step()
        return loss_val

    def predict(self, batch):
        inputs, labels = unpack_batch(batch)
        self.model.eval()  #关闭dropout
        orig_idx = batch[-1]
        logits, _ = self.model(inputs)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        return predictions