예제 #1
0
    def pretrain_reid(self):
        """Training reid, and may validate on val set."""

        start_ep = cfg.pre_reid_resume_ep if cfg.pre_reid_resume else 0
        for ep in range(start_ep, cfg.pre_reid_num_epochs):

            # Force all BN layers to use global mean and variance
            utils.may_set_mode(self.modules_optims, 'eval')
            # Enable dropout
            #utils.may_set_mode(self.reidTop.dropout, 'train')

            epoch_done = False
            ep_losses = []
            ep_st = time.time()
            step = 0
            while not epoch_done:

                step += 1
                step_st = time.time()

                ims, im_names, labels, ims_mirrored, epoch_done = \
                    self.train_set.next_batch()
                ims = TVT(Variable(torch.from_numpy(ims).float()))
                labels = TVT(Variable(torch.LongTensor(labels)))
                logits = self.googlenet(ims)

                loss = self.reid_criterion(logits, labels)
                self.optimReID.zero_grad()
                loss.backward()
                self.optimReID.step()

                ep_losses.append(utils.to_scalar(loss))

                # Step logs
                if step % cfg.pre_reid_log_steps == 0:
                    print '[Step {}/Ep {}], [{:.2f}s], [loss: {}]'.format(
                        step + 1, ep + 1,
                        time.time() - step_st, utils.to_scalar(loss))

            # Epoch logs
            print '===========> [Epoch {}], [{:.2f}s], [ep_avg_loss: {}]'.format(
                ep + 1,
                time.time() - ep_st, np.mean(ep_losses))

            # validation

            if cfg.train_val_part == 'train':
                self.val_set.eval_single_query(True)
                self.val_set.eval_multi_query(False)

            # epoch saving
            if (ep + 1) % cfg.pre_reid_epochs_per_saving_ckpt == 0 \
                    or ep + 1 == cfg.pre_reid_num_epochs:
                utils.may_save_modules_optims_state_dicts(
                    self.modules_optims,
                    cfg.pre_reid_ckpt_saving_tmpl.format(ep + 1))

        self.train_set.stop_prefetching_threads()
        if cfg.train_val_part == 'train':
            self.val_set.stop_prefetching_threads()
예제 #2
0
def train(epoch,train_loader,DEVICE):
    print("starting epoch {}".format(epoch))
    train_loss = []
    kl_weight = 1
    for batch_idx, (data, _) in enumerate(train_loader):
        start_time = time.time()
        x = Variable(data, requires_grad=False).to(DEVICE)
        opt.zero_grad()
        x_di = vmodel(x)
        # use cuda?
        dmll_loss = discretized_mix_logistic_loss(x_di, 2*x-1, nr_mix=nr_mix,use_cuda=args.cuda)
        kl_loss = kl_weight*latent_loss(vmodel.z_mean, vmodel.z_sigma)
        loss = dmll_loss+kl_loss
        loss.backward()
        opt.step()
        train_loss.append(to_scalar([kl_loss, dmll_loss]))

        if not batch_idx%10:
            print 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / float(len(train_loader)),
                np.asarray(train_loss).mean(0),
                time.time() - start_time
            )

    return np.asarray(train_loss).mean(0)
예제 #3
0
def train_epoch(data_train, model, optimizer, criterion, args):
    model.train()
    epoch_loss = 0
    for doc_train in tqdm.tqdm(data_train):
        for tokens, pos_tags, labels in zip(doc_train.sents, doc_train.pos_tags, doc_train.token_labels):
            assert len(tokens) == len(labels)
            # pdb.set_trace()
            tokens = [args.word2idx[i] for i in tokens]
            pos_tags = [args.pos2idx[i] for i in pos_tags]
            labels = [args.label2idx[i] for i in labels]
            y_true = labels

            # pdb.set_trace()
            tokens = Variable(torch.LongTensor(np.array([tokens]).transpose()))
            pos_tags = Variable(torch.LongTensor(np.array([pos_tags]).transpose()))
            labels = Variable(torch.LongTensor(np.array(labels).transpose()))     # labels have to be one-dim for NLL loss
            if args.cuda:
                tokens, pos_tags, labels = [tokens.cuda(), pos_tags.cuda(), labels.cuda()]
            scores = model(tokens, pos_tags)
            # print(scores)
            # print(labels)
            loss = criterion(scores, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += utils.to_scalar(loss)
    print('epoch loss:{}'.format(epoch_loss))
예제 #4
0
def train(model, params, optimizer, q_a_data, q_target_data, answer_data):
    N = int(math.floor(len(q_a_data) / params.batch_size))

    shuffle_index = np.random.permutation(q_a_data.shape[0])
    q_a_data = q_a_data[shuffle_index]
    q_target_data = q_target_data[shuffle_index]
    answer_data = answer_data[shuffle_index]

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.train()

    for idx in range(N):
        q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) *
                           params.batch_size, :]
        q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) *
                                     params.batch_size, :]
        answer_seq = answer_data[idx * params.batch_size:(idx + 1) *
                                 params.batch_size, :]

        target = (answer_seq - 1) / params.n_question
        target = np.floor(target)
        input_q_target = utils.variable(torch.LongTensor(q_target_seq),
                                        params.gpu)
        input_x = utils.variable(torch.LongTensor(q_a_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)
        input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size,
                                           0)
        input_q_target_1d = torch.cat(
            [input_q_target_to_1d[i] for i in range(params.batch_size)], 1)
        input_q_target_1d = input_q_target_1d.permute(1, 0)

        model.zero_grad()
        loss, filtered_pred, filtered_target = model(input_x,
                                                     input_q_target_1d,
                                                     target_1d)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm)
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #5
0
파일: run.py 프로젝트: tianlinyang/DKVMN
def train(epoch_num, model, params, optimizer, q_data, qa_data):
    N = int(math.floor(len(q_data) / params.batch_size))

    # shuffle_index = np.random.permutation(q_data.shape[0])
    # q_data_shuffled = q_data[shuffle_index]
    # qa_data_shuffled = qa_data[shuffle_index]

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.train()

    # init_memory_value = np.random.normal(0.0, params.init_std, ())
    for idx in range(N):
        q_one_seq = q_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        target = qa_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]

        target = (target - 1) / params.n_question
        target = np.floor(target)
        input_q = utils.varible(torch.LongTensor(q_one_seq), params.gpu)
        input_qa = utils.varible(torch.LongTensor(qa_batch_seq), params.gpu)
        target = utils.varible(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)

        model.zero_grad()
        loss, filtered_pred, filtered_target = model.forward(input_q, input_qa, target_1d)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), params.maxgradnorm)
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        # print(right_pred)
        # print(right_target)
        # right_index = np.flatnonzero(right_target != -1.).tolist()
        pred_list.append(right_pred)
        target_list.append(right_target)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)
    # if (epoch_num + 1) % params.decay_epoch == 0:
    #     utils.adjust_learning_rate(optimizer, params.init_lr * params.lr_decay)
    # print('lr: ', params.init_lr / (1 + 0.75))
    # utils.adjust_learning_rate(optimizer, params.init_lr / (1 + 0.75))
    # print("all_target", all_target)
    # print("all_pred", all_pred)
    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)
    # f1 = metrics.f1_score(all_target, all_pred)

    return epoch_loss/N, accuracy, auc
예제 #6
0
def test(model, params, optimizer, q_data, qa_data):
    N = int(math.floor(len(q_data) / params.batch_size))

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.eval()

    # init_memory_value = np.random.normal(0.0, params.init_std, ())
    for idx in range(N):
        q_one_seq = q_data[idx * params.batch_size:(idx + 1) *
                           params.batch_size, :]
        qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) *
                               params.batch_size, :]
        target = qa_data[idx * params.batch_size:(idx + 1) *
                         params.batch_size, :]

        target = (target - 1) / params.n_question
        target = np.floor(target)

        input_q = utils.varible(torch.LongTensor(q_one_seq),
                                params.gpu)  # shape 32,200
        input_qa = utils.varible(torch.LongTensor(qa_batch_seq),
                                 params.gpu)  # shape 32,200
        target = utils.varible(torch.FloatTensor(target),
                               params.gpu)  # shape 32,200

        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)

        loss, filtered_pred, filtered_target, _ = model.forward(
            input_q, input_qa, target_1d)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)
        epoch_loss += utils.to_scalar(loss)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    # print("all_target", all_target)
    # print("all_pred", all_pred)
    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)
    f1 = metrics.f1_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc, f1
예제 #7
0
def test(model, params, optimizer, q_data, qa_data, a_data):
    N = int(math.floor(len(q_data) / params.batch_size))

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.eval()

    for idx in range(N):

        q_one_seq = q_data[idx * params.batch_size:(idx + 1) *
                           params.batch_size, :]
        qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) *
                               params.batch_size, :]
        a_batch_seq = a_data[idx * params.batch_size:(idx + 1) *
                             params.batch_size, :]
        target = qa_data[idx * params.batch_size:(idx + 1) *
                         params.batch_size, :]

        target = (target - 1) / params.n_question
        target = np.floor(target)

        input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu)
        input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu)
        input_a = utils.variable(torch.LongTensor(a_batch_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)

        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)

        loss, filtered_pred, filtered_target = model.forward(
            input_q, input_qa, input_a, target_1d)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)
        epoch_loss += utils.to_scalar(loss)

        # print("testing : batch " + str(idx) + " finished!")

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #8
0
파일: run.py 프로젝트: XiangrongXu/ML
def train(num_epochs, model, params, optimizer, q_data, qa_data):
    N = len(q_data) // params.batch_size

    pred_list = []
    target_list = []
    epoch_loss = 0

    # turn the status of model to the train status
    model.train()

    for idx in range(N):
        q_one_seq = q_data[idx * params.batch_size:(idx + 1) *
                           params.batch_size, :]
        qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) *
                               params.batch_size, :]
        target = qa_data[idx * params.batch_size:(idx + 1) *
                         params.batch_size, :]

        target = (target - 1) / params.n_question
        target = np.floor(target)
        input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu)
        input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)

        model.zero_grad()
        loss, filtered_pred, filtered_target = model.forward(
            input_q, input_qa, target_1d)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), params.maxgradnorm)
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())

        pred_list.append(right_pred)
        target_list.append(right_target)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #9
0
 def training_epoch_end(self, outputs, prefix='train'):
     losses = torch.stack([output['loss'] for output in outputs])
     sizes = torch.tensor([output['size'] for output in outputs], device=losses.device)
     loss_mean = (losses * sizes).sum() / sizes.sum()
     outs = [output['out'] for output in outputs]
     targets = [output['target'] for output in outputs]
     metrics = self.dataset.metrics_epoch(outs, targets)
     metrics = {f'{prefix}_{k}': v for k, v in metrics.items()}
     results = {f'{prefix}_loss': loss_mean, **metrics}
     results_scalar = {k: to_scalar(v) for k, v in results.items()}  # PL prefers torch.Tensor while we prefer float
     setattr(self, f'_{prefix}_results', results_scalar)
     if getattr(self.train_args, 'verbose', False):
         print(f'{prefix} set results:', results_scalar)
     return {f'{prefix}_loss': loss_mean, 'log': results}
예제 #10
0
def test(model, params, optimizer, q_a_data, q_target_data, answer_data, repeated_time_gap, past_trail_counts, seq_time_gap):
    N = int(math.floor(len(q_a_data) / params.batch_size))

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.eval()

    for idx in range(N):
        q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        repeated_time_gap_seq = repeated_time_gap[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        past_trail_counts_seq = past_trail_counts[idx * params.batch_size:(idx + 1) * params.batch_size, :]
        seq_time_gap_seq = seq_time_gap[idx * params.batch_size:(idx + 1) * params.batch_size, :]

        input_repeated_time_gap = utils.variable(torch.FloatTensor(repeated_time_gap_seq), params.gpu)
        input_past_trail_counts = utils.variable(torch.FloatTensor(past_trail_counts_seq), params.gpu)
        input_seq_time_gap = utils.variable(torch.FloatTensor(seq_time_gap_seq), params.gpu)

        target = (answer_seq - 1) / params.n_question
        target = np.floor(target)
        input_q_target = utils.variable(torch.LongTensor(q_target_seq), params.gpu)
        input_x = utils.variable(torch.LongTensor(q_a_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)
        input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0)
        input_q_target_1d = torch.cat([input_q_target_to_1d[i] for i in range(params.batch_size)], 1)
        input_q_target_1d = input_q_target_1d.permute(1, 0)

        loss, filtered_pred, filtered_target = model.forward(input_x, input_q_target_1d, target_1d, input_repeated_time_gap, input_past_trail_counts, input_seq_time_gap)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)
        epoch_loss += utils.to_scalar(loss)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #11
0
def test(epoch, test_loader, do_use_cuda, save_img_path=None):
    test_loss = []
    for batch_idx, (data, _) in enumerate(test_loader):
        start_time = time.time()
        if do_use_cuda:
            x = Variable(data, requires_grad=False).cuda()
        else:
            x = Variable(data, requires_grad=False)

        x_d, z_e_x, z_q_x, latents = vmodel(x)
        loss_1 = discretized_mix_logistic_loss(x_d,
                                               2 * x - 1,
                                               use_cuda=do_use_cuda)
        loss_2 = F.mse_loss(z_q_x, z_e_x.detach())
        loss_3 = .25 * F.mse_loss(z_e_x, z_q_x.detach())
        test_loss.append(to_scalar([loss_1, loss_2, loss_3]))
    test_loss_mean = np.asarray(test_loss).mean(0)
    if save_img_path is not None:
        x_tilde = sample_from_discretized_mix_logistic(x_d, nr_logistic_mix)
        idx = 0
        x_cat = torch.cat([x[idx], x_tilde[idx]], 0)
        images = x_cat.cpu().data
        pred = (((np.array(x_tilde.cpu().data)[0, 0] + 1.0) / 2.0) *
                float(max_pixel - min_pixel)) + min_pixel
        # input x is between 0 and 1
        real = (np.array(x.cpu().data)[0, 0] *
                float(max_pixel - min_pixel)) + min_pixel
        f, ax = plt.subplots(1, 3, figsize=(10, 3))
        ax[0].imshow(real, vmin=0, vmax=max_pixel)
        ax[0].set_title("original")
        ax[1].imshow(pred, vmin=0, vmax=max_pixel)
        ax[1].set_title("pred epoch %s test loss %s" %
                        (epoch, np.mean(test_loss_mean)))
        ax[2].imshow((pred - real)**2, cmap='gray')
        ax[2].set_title("error")
        f.tight_layout()
        plt.savefig(save_img_path)
        plt.close()
        print("saving example image")
        print("rsync -avhp [email protected]://%s" %
              os.path.abspath(save_img_path))

    return test_loss_mean
예제 #12
0
def train(epoch, train_loader, do_use_cuda):
    print("starting epoch {}".format(epoch))
    train_loss = []
    for batch_idx, (data, _) in enumerate(train_loader):
        start_time = time.time()
        if do_use_cuda:
            x = Variable(data, requires_grad=False).cuda()
        else:
            x = Variable(data, requires_grad=False)
        opt.zero_grad()
        x_d, z_e_x, z_q_x, latents = vmodel(x)
        # with bigger model - latents is 64, 6, 6
        z_q_x.retain_grad()
        #loss_1 = F.binary_cross_entropy(x_d, x)
        # going into dml - x should be bt 0 and 1
        loss_1 = discretized_mix_logistic_loss(x_d,
                                               2 * x - 1,
                                               use_cuda=do_use_cuda)
        loss_1.backward(retain_graph=True)
        vmodel.embedding.zero_grad()
        z_e_x.backward(z_q_x.grad, retain_graph=True)

        loss_2 = F.mse_loss(z_q_x, z_e_x.detach())
        loss_2.backward(retain_graph=True)
        loss_3 = .25 * F.mse_loss(z_e_x, z_q_x.detach())
        loss_3.backward()
        opt.step()
        train_loss.append(to_scalar([loss_1, loss_2, loss_3]))
        if not batch_idx % 100:
            print 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {} Time: {}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / float(len(train_loader)),
                np.asarray(train_loss).mean(0),
                time.time() - start_time)

    return np.asarray(train_loss).mean(0)
예제 #13
0
def test(epoch,test_loader,DEVICE,save_img_path=None):
    test_loss = []
    kl_weight = 1
    for batch_idx, (data, _) in enumerate(test_loader):
        start_time = time.time()
        x = Variable(data, requires_grad=False).to(DEVICE)
        x_di = vmodel(x)
        # use cuda?
        dmll_loss = discretized_mix_logistic_loss(x_di, 2*x-1, nr_mix=nr_mix,use_cuda=args.cuda)
        kl_loss = kl_weight*latent_loss(vmodel.z_mean, vmodel.z_sigma)
        loss = dmll_loss+kl_loss
        test_loss.append(to_scalar([kl_loss, dmll_loss]))

    test_loss_mean = np.asarray(test_loss).mean(0)
    #if save_img_path is not None:
    #    x_tilde = sample_from_discretized_mix_logistic(x_d, nr_logistic_mix)
    #    idx = 0
    #    x_cat = torch.cat([x[idx], x_tilde[idx]], 0)
    #    images = x_cat.cpu().data
    #    pred = (((np.array(x_tilde.cpu().data)[0,0]+1.0)/2.0)*float(max_pixel-min_pixel)) + min_pixel
    #    # input x is between 0 and 1
    #    real = (np.array(x.cpu().data)[0,0]*float(max_pixel-min_pixel))+min_pixel
    #    f, ax = plt.subplots(1,3, figsize=(10,3))
    #    ax[0].imshow(real, vmin=0, vmax=max_pixel)
    #    ax[0].set_title("original")
    #    ax[1].imshow(pred, vmin=0, vmax=max_pixel)
    #    ax[1].set_title("pred epoch %s test loss %s" %(epoch,np.mean(test_loss_mean)))
    #    ax[2].imshow((pred-real)**2, cmap='gray')
    #    ax[2].set_title("error")
    #    f.tight_layout()
    #    plt.savefig(save_img_path)
    #    plt.close()
    #    print("saving example image")
    #    print("rsync -avhp [email protected]://%s" %os.path.abspath(save_img_path))

    return test_loss_mean
예제 #14
0
def train_epoch(data_train, model, optimizer, criterion_t, criterion_i, args):

    model.train()
    epoch_loss = 0
    # data_train = remove_neg_data(data_train)
    # pdb.set_trace()
    # all_data: corpus_ids, corpus_tokens, corpus_pos_tags, corpus_trigger_labels, corpus_interaction_idxs, corpus_interaction_labels
    for d in tqdm.tqdm(data_train):

        model.zero_grad()

        tokens = d[1]
        pos_tags = d[2]
        trigger_labels = d[3]
        assert len(tokens) == len(trigger_labels)

        tokens = [args.word2idx[i] for i in tokens]
        pos_tags = [args.pos2idx[i] for i in pos_tags]
        trigger_labels = [args.triggerlabel2idx[i] for i in trigger_labels]

        tokens = Variable(torch.LongTensor(np.array([tokens]).transpose()))
        pos_tags = Variable(torch.LongTensor(np.array([pos_tags]).transpose()))
        trigger_labels = Variable(
            torch.LongTensor(np.array(trigger_labels).transpose())
        )  # labels have to be one-dim for NLL loss

        if args.cuda:
            tokens, pos_tags, trigger_labels = [
                tokens.cuda(),
                pos_tags.cuda(),
                trigger_labels.cuda()
            ]

        # first predict for triggers
        scores_trigger = model(tokens,
                               pos_tags,
                               pair_idxs=None,
                               task='trigger')
        loss_trigger = criterion_t(scores_trigger, trigger_labels)

        # second predict edges, there are two cases
        if args.pred_edge_with_gold:
            # in this case, just use the gold pairs and predict the edge
            pair_idxs = d[4]
            interaction_labels = d[5]
            assert len(pair_idxs) == len(interaction_labels)
            # only select Theme and Cause edges
            # this is to exclude the Site ... args
            # pair_idxs = [pair_idxs[i] for i in range(len(pair_idxs)) if interaction_labels[i] not in interaction_ignore_types]
            # interaction_labels = [interaction_labels[i] for i in range(len(interaction_labels)) if interaction_labels[i] not in interaction_ignore_types]

            # we construct the pairs using gold trigger labels
            # note that there can be None pairs
            pair_idxs, interaction_labels = construct_pairs(
                y_preds=[args.triggerlabel2idx[i] for i in d[3]],
                gold_pair_idxs=d[4],
                gold_int_labels=d[5],
                gold_trigger_labels=d[3],
                args=args,
                test=False)

        elif args.pred_edge_with_pred:
            # in this case, first construct the pairs with predicted triggers, pairs:(T, E), (T, T)
            # returned pair_idxs and ineteraction_labels can be empty

            y_preds = scores_trigger.max(dim=1, keepdim=False)[1].tolist()
            # we construct the pairs using predicted triggers
            pair_idxs, interaction_labels = construct_pairs(
                y_preds=y_preds,
                gold_pair_idxs=d[4],
                gold_int_labels=d[5],
                gold_trigger_labels=d[3],
                args=args,
                test=False)
        assert len(pair_idxs) == len(interaction_labels)
        assert set(interaction_labels).intersection(
            set(interaction_ignore_types)) == set(
                []), pdb.set_trace()  #print(interaction_labels)

        interaction_labels = [
            args.interactionlabel2idx[i] for i in interaction_labels
        ]
        interaction_labels = Variable(
            torch.LongTensor(np.array(interaction_labels).transpose()))
        if args.cuda:
            interaction_labels = interaction_labels.cuda()

        loss_interaction = 0
        if len(pair_idxs) > 0:
            # Only compute loss for those sentences which have interactions
            scores_interaction = model(tokens,
                                       pos_tags,
                                       pair_idxs,
                                       task='interaction')
            loss_interaction = criterion_i(scores_interaction,
                                           interaction_labels)

        loss = args.trigger_w * loss_trigger + args.interaction_w * loss_interaction
        loss.backward()
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)
    print('epoch loss:{}'.format(epoch_loss))
예제 #15
0
def test(model, params, optimizer, q_a_data, q_target_data, answer_data):
    N = int(math.floor(len(q_a_data) / params.batch_size))

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.eval()

    for idx in range(N):
        q_a_seq = q_a_data[idx * params.batch_size:(idx + 1) * params.batch_size]
        q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) * params.batch_size]
        answer_seq = answer_data[idx * params.batch_size:(idx + 1) * params.batch_size]

        max_len = max(len(q_a_seq[i]) for i in range(params.batch_size))

        q_a_dataArray = np.zeros((params.batch_size, max_len))
        q_target_dataArray = np.zeros((params.batch_size, max_len))
        answer_dataArray = np.zeros((params.batch_size, max_len))
        for j in range(params.batch_size):
            dat = q_a_seq[j]
            q_a_dataArray[j, :len(dat)] = dat

            q_target_dat = q_target_seq[j]
            q_target_dataArray[j, :len(q_target_dat)] = q_target_dat

            answer_dat = answer_seq[j]
            answer_dataArray[j, :len(answer_dat)] = answer_dat

        # q_a_dataArray = q_a_data[idx]
        # q_target_dataArray = q_target_data[idx]
        # answer_dataArray = answer_data[idx]

        target = (answer_dataArray - 1) / params.n_question
        target = np.floor(target)
        input_q_target = utils.variable(torch.LongTensor(q_target_dataArray), params.gpu)
        input_x = utils.variable(torch.LongTensor(q_a_dataArray), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat([target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)
        input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size, 0)
        input_q_target_1d = torch.cat([input_q_target_to_1d[i] for i in range(params.batch_size)], 1)
        input_q_target_1d = input_q_target_1d.permute(1, 0)

        loss, filtered_pred, filtered_target = model.forward(input_x, input_q_target_1d, target_1d)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)
        epoch_loss += utils.to_scalar(loss)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #16
0
def train(model, epoch, params, optimizer, q_data, q_target_data, qa_data):
    N = int(math.floor(len(q_data) / params.batch_size))

    # shuffle data
    shuffle_index = np.random.permutation(q_data.shape[0])
    q_data = q_data[shuffle_index]  # 用来Input的pair
    q_target_data = q_target_data[shuffle_index]  # 下一题题号
    qa_data = qa_data[shuffle_index]  # 下一题pair

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.train()

    for idx in range(N):
        q_target_seq = q_target_data[idx * params.batch_size:(idx + 1) *
                                     params.batch_size, :]  # 下一题题号
        qa_batch_seq = q_data[idx * params.batch_size:(idx + 1) *
                              params.batch_size, :]  # 用于input
        target = qa_data[idx * params.batch_size:(idx + 1) *
                         params.batch_size, :]  # 下一题答案

        target = (target - 1) / params.n_question
        target = np.floor(target)  # 1是正确,等于0是错误,-1是填充的0补全的位置
        input_q_target = utils.variable(torch.LongTensor(q_target_seq),
                                        params.gpu)
        input_x = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)
        input_q_target_to_1d = torch.chunk(input_q_target, params.batch_size,
                                           0)
        input_q_target_1d = torch.cat(
            [input_q_target_to_1d[i] for i in range(params.batch_size)], 1)
        input_q_target_1d = input_q_target_1d.permute(1, 0)

        model.zero_grad()
        loss, filtered_pred, filtered_target = model(
            input_x, input_q_target_1d,
            target_1d)  # 答案编码后10x800,题号8000x1,答案8000x1
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm)
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)
    if (epoch + 1) % params.decay_epoch == 0:
        new_lr = params.init_lr * params.lr_decay
        if new_lr < params.final_lr:
            new_lr = params.final_lr
        utils.adjust_learning_rate(optimizer, new_lr)
    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)
    # f1 = metrics.f1_score(all_target, all_pred)

    return epoch_loss / N, accuracy, auc
예제 #17
0
        ner_model.train()
        for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v in tqdm(
                itertools.chain.from_iterable(dataset_loader),
                mininterval=2,
                desc=' - Tot it %d (epoch %d)' %
            (tot_length, args.start_epoch),
                leave=False,
                file=sys.stdout):
            #f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v 都是什么?????
            #Ex. for i in tqdm(range(1000)):
            f_f, f_p, b_f, b_p, w_f, tg_v, mask_v = packer.repack_vb(
                f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v)
            ner_model.zero_grad()
            scores = ner_model(f_f, f_p, b_f, b_p, w_f)
            loss = crit_ner(scores, tg_v, mask_v)
            epoch_loss += utils.to_scalar(loss)
            if args.co_train:
                cf_p = f_p[0:-1, :].contiguous()
                cb_p = b_p[1:, :].contiguous()
                cf_y = w_f[1:, :].contiguous()
                cb_y = w_f[0:-1, :].contiguous()
                cfs, _ = ner_model.word_pre_train_forward(f_f, cf_p)
                loss = loss + args.lambda0 * crit_lm(cfs, cf_y.view(-1))
                cbs, _ = ner_model.word_pre_train_backward(b_f, cb_p)
                loss = loss + args.lambda0 * crit_lm(cbs, cb_y.view(-1))
            loss.backward()
            nn.utils.clip_grad_norm(ner_model.parameters(), args.clip_grad)
            optimizer.step()
        epoch_loss /= tot_length

        # update lr
예제 #18
0
def train(model, params, optimizer, q_data, qa_data, a_data):
    N = int(math.floor(len(q_data) / params.batch_size))  # batch的数量

    # shuffle data
    shuffle_index = np.random.permutation(q_data.shape[0])
    q_data = q_data[shuffle_index]
    qa_data = qa_data[shuffle_index]
    a_data = a_data[shuffle_index]

    pred_list = []
    target_list = []
    epoch_loss = 0
    model.train()

    start = time.time()

    for idx in range(N):
        q_one_seq = q_data[idx * params.batch_size:(idx + 1) *
                           params.batch_size, :]
        qa_batch_seq = qa_data[idx * params.batch_size:(idx + 1) *
                               params.batch_size, :]
        a_batch_seq = a_data[idx * params.batch_size:(idx + 1) *
                             params.batch_size, :]
        target = qa_data[idx * params.batch_size:(idx + 1) *
                         params.batch_size, :]

        target = (target - 1) / params.n_question
        target = np.floor(target)  # 向下取整
        input_q = utils.variable(torch.LongTensor(q_one_seq), params.gpu)
        input_qa = utils.variable(torch.LongTensor(qa_batch_seq), params.gpu)
        input_a = utils.variable(torch.LongTensor(a_batch_seq), params.gpu)
        target = utils.variable(torch.FloatTensor(target), params.gpu)
        target_to_1d = torch.chunk(target, params.batch_size, 0)
        target_1d = torch.cat(
            [target_to_1d[i] for i in range(params.batch_size)], 1)
        target_1d = target_1d.permute(1, 0)  # 维度换位

        model.zero_grad()
        loss, filtered_pred, filtered_target = model(input_q, input_qa,
                                                     input_a, target_1d)
        loss.backward()  # 每一个batch做一次反向传播
        nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm)
        optimizer.step()
        epoch_loss += utils.to_scalar(loss)

        # print("training : batch " + str(idx) + " finished!")

        right_target = np.asarray(filtered_target.data.tolist())
        right_pred = np.asarray(filtered_pred.data.tolist())
        pred_list.append(right_pred)
        target_list.append(right_target)

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)
    # if (idx + 1) % params.decay_epoch == 0:
    #     utils.adjust_learning_rate(optimizer, params.init_lr * params.lr_decay)
    # print('lr: ', params.init_lr / (1 + 0.75))
    auc = metrics.roc_auc_score(all_target, all_pred)
    all_pred[all_pred >= 0.5] = 1.0
    all_pred[all_pred < 0.5] = 0.0
    accuracy = metrics.accuracy_score(all_target, all_pred)
    # f1 = metrics.f1_score(all_target, all_pred)

    end = time.time()
    print("epoch time:" + str(end - start))

    return epoch_loss / N, accuracy, auc