示例#1
0
    def train_max_epochs(self, args, train0, train1, dev0, dev1, vocab, no_of_epochs, writer, time, save_epochs_flag=False, 
        save_batch_flag=False, save_batch=5):
        print("No of epochs: ", no_of_epochs)
        self.train()
        self.enc_optim = optim.AdamW(self.encoder.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2))
        self.gen_optim = optim.AdamW(self.generator.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2))
        self.discrim1_optim = optim.AdamW(self.discriminator1.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2))
        self.discrim2_optim = optim.AdamW(self.discriminator2.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2))
        
        Path(args.saves_path).mkdir(parents=True, exist_ok=True)        
        saves_path = os.path.join(args.saves_path, utils.get_filename(args, time, "model"))
        Path(saves_path).mkdir(parents=True, exist_ok=True)
        flag = True
        with autograd.detect_anomaly():
            for epoch in range(no_of_epochs):
                random.shuffle(train0)
                random.shuffle(train1)
                batches0, batches1, _1, _2 = utils.get_batches(train0, train1, vocab.word2id, 
                args.batch_size, noisy=True)
                dev_batches0 = []
                dev_batches1 = []
                if self.args.dev:
                    dev_batches0, dev_batches1, _, _ = utils.get_batches(dev0, dev1, vocab.word2id, args.batch_size, noisy=True)
                # batches0, batches1, _1, _2 = utils.get_batches_bpe(train0, train1, vocab.word2id,
                # args.batch_size, noisy=True)
                
                random.shuffle(batches0)
                random.shuffle(batches1)
                print("Epoch: ", epoch)
                self.logger.info("Epoch: "+str(epoch))

                train_flag = self(args, batches0, batches1, dev_batches0, dev_batches1, vocab, no_of_epochs, epoch, writer, time, save_epochs_flag=False, 
                    save_batch_flag=False, save_batch=5)
                if train_flag:
                    break
示例#2
0
    def loss_function(self, forward_ret, labels=None):
        x_gen, x_real = forward_ret
        if self.debug:
            debug_context = autograd.detect_anomaly()
        else:
            debug_context = contextlib.nullcontext()
        with debug_context:
            d_p = self.disc(x_real)
            d_q = self.disc(x_gen)

        if self.train_disc():
            if self.flags.gan_loss == 'bce':
                loss = F.binary_cross_entropy_with_logits(d_p, torch.ones_like(d_p)) + \
                    F.binary_cross_entropy_with_logits(d_q, torch.zeros_like(d_q))
            elif self.flags.gan_loss == 'wgan':
                grad_penalty = self.gradient_penalty(x_real,
                                                     x_gen,
                                                     context=debug_context)
                loss = -d_p.mean() + d_q.mean() + (
                    10.0 * grad_penalty) + 1e-3 * (d_p**2).mean()
            self.d_loss = loss.item()
        else:
            if self.flags.gan_loss == 'bce':
                loss = F.binary_cross_entropy_with_logits(d_p, torch.zeros_like(d_p)) + \
                    F.binary_cross_entropy_with_logits(d_q, torch.ones_like(d_q))
            elif self.flags.gan_loss == 'wgan':
                loss = d_p.mean() - d_q.mean()
            self.g_loss = loss.item()

        return loss, self.g_loss, self.d_loss
示例#3
0
 def train(self):
     train_loss = np.array([])
     valid_loss = np.array([])
     print("start train")
     for epoch in range(self.epoch_num):
         with detect_anomaly():
             print('epoch{0}'.format(epoch))
             start = time.time()
             self.model.train()
             tmp_train_loss = self._run(self.model, self.criterion, self.train_data_loader, self.train_batch_size, mode='train')
             train_loss = np.append(train_loss, tmp_train_loss.clone().numpy())
             validation
             self.model.eval()
             with torch.no_grad():
                 tmp_valid_loss = self._run(self.model, self.criterion, self.valid_data_loader, self.valid_batch_size, mode='validation')
                 valid_loss = np.append(valid_loss, tmp_valid_loss.clone().numpy())
                  
             if (epoch + 1) % 10 == 0:
                 torch.save(self.model.state_dict(), self.save_path + 'wave_u_net{0}.ckpt'.format(epoch + 1))
             
             end = time.time()
             print('----excute time: {0}'.format(end - start))
         plt.plot(train_loss)
         print(train_loss)
         plt.show()
示例#4
0
    def loss_function(self, input, output, mean, logvar):
        """
        Given input, output, mean and std, compute and return loss
        """

        # to prevent log of 0
        epsilon = 1e-6

        with autograd.detect_anomaly():

            L_recon = -1 * torch.sum(input * torch.log(output + epsilon) +
                                     (1 - input) * torch.log(1 - output),
                                     dim=1)
            # L_reg = -0.5 * torch.sum(1 + std - mean.pow(2) - std.exp())

            # L_recon = torch.nn.functional.binary_cross_entropy(output, input)

            L_reg = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())

            # L_reg = torch.sum(torch.sum(-1*torch.log(std) + ((std.pow(2) + mean.pow(2))-1)*0.5, dim=1, dim=0)

            # Normalise by same number of elements as in reconstruction if we average recon
            L_reg /= input.size(dim=0)

            # * self.image_dim  #####CHECK IF THIS IS NEEDED AGAIN!!!!!!

        # get total loss
        total_loss = torch.mean(
            L_recon + L_reg, dim=0
        )  # may need to be the sum###############################################

        return total_loss
示例#5
0
def train(epoch, batch_logger, train_loader):
    model.train()

    if epoch == 120:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0001

    if epoch == 150:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.00001

    for i, data in enumerate(train_loader):
        with autograd.detect_anomaly():
            data = data.to(device)
            #print(data.y)
            optimizer.zero_grad()
            end_point = model(data)
            
            loss = F.nll_loss(end_point, data.y)
            pred = end_point.max(1)[1]
            acc = (pred.eq(data.y).sum().item())/len(data.y)
            
            loss.backward()
            optimizer.step()
            
            if i % 10 == 0:
                batch_logger.log({'epoch': epoch,'batch': i + 1,'loss': loss.item(),'acc': acc})
示例#6
0
    def train(self, xs, ys):
        with autograd.detect_anomaly():
            xs = [self._t_cuda(x) for x in xs]
            ys = [self._t_cuda(y) for y in ys]

            #把loss关于weight的导数变成0,即把梯度置零
            self.optimizer.zero_grad()
            #执行model(x)的时候,底层自动调用forward计算结果
            loss, focal_loss, pull_loss, push_loss, off_loss = self.network(
                xs, ys)
            #计算loss
            loss = loss.mean()

            #added
            focal_loss = focal_loss.mean()
            pull_loss = pull_loss.mean()
            push_loss = push_loss.mean()
            off_loss = off_loss.mean()

            #反向传播求梯度
            loss.backward()
            #更新参数,即w* = w+alpha*grad
            self.optimizer.step()

            return loss, focal_loss, pull_loss, push_loss, off_loss
示例#7
0
def train(model, loader, optimizer, n_iter):
    model.train()
    err = 0.0
    i = 0
    pbar = tqdm(total=len(loader), desc='records loaded')
    for i, (seq, prof, _, dmat, pdb, *_) in enumerate(batch_generator(loader, prepare_xu_batch)):
        optimizer.zero_grad()

        cmap_hat = predict(model, seq, prof)

        if n_iter % UPLOAD_IMAGE_EVERY == 0:
            dmat_hat = cmap_to_dmat(cmap_hat)
            upload_images(dmat, dmat_hat, pdb, n_iter, '%s/%s' % (model.name, 'train'))

        loss = get_loss(cmap_hat, dmat)
        err += loss.item()
        e = err / (i + 1.)

        writer.add_scalars('%s/Loss' % model.name, {"train": loss.item()}, n_iter)

        try:
            with autograd.detect_anomaly():
                loss.backward()
        except RuntimeError as e:
            raise e

        optimizer.step_and_update_lr(loss.item())
        lr = optimizer.lr

        pbar.set_description("Training Loss:%.6f, LR: %.6f (L=%d)" % (e, lr, seq.size(1)))
        pbar.update(seq.size(0))
        n_iter += 1

    pbar.close()
    return n_iter
示例#8
0
    def train_epoch(self, epoch):
        """
        Evaluate the model on the train set.
        """
        t1 = time()
        output = {
            'tp': [],
            'fp': [],
            'fn': [],
            'tn': [],
            'loss': [],
            'preds': []
        }
        train_info = []

        self.model = self.model.train()
        train_iter = self.iterator(self.data['train'],
                                   batch_size=self.params['batch'],
                                   shuffle_=self.params['shuffle_data'])
        self.optimizer.zero_grad()
        for batch_idx, batch in enumerate(train_iter):
            batch = self.convert_batch(batch)

            with autograd.detect_anomaly():
                loss, stats, predictions, select = self.model(batch)
                loss.backward()  # backward computation

            output['loss'] += [loss.item()]
            output['tp'] += [stats['tp'].to('cpu').data.numpy()]
            output['fp'] += [stats['fp'].to('cpu').data.numpy()]
            output['fn'] += [stats['fn'].to('cpu').data.numpy()]
            output['tn'] += [stats['tn'].to('cpu').data.numpy()]
            output['preds'] += [predictions.to('cpu').data.numpy()]
            train_info += [
                batch['info'][select[0].to('cpu').data.numpy(),
                              select[1].to('cpu').data.numpy(),
                              select[2].to('cpu').data.numpy()]
            ]
            # Accumulate gradients (by Yuwei Xu)
            if (batch_idx + 1) % self.accumulation_steps == 0:
                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.gc)  # gradient clipping
                self.optimizer.step()
                self.optimizer.zero_grad()
        t2 = time()
        if self.window:
            total_loss, scores = self.subdocs_performance(
                output['loss'], output['preds'], train_info)
        else:
            total_loss, scores = self.performance(output)

        self.train_res['loss'] += [total_loss]
        self.train_res['score'] += [scores[self.primary_metric]]
        print('Epoch: {:02d} | TRAIN | LOSS = {:.05f}, '.format(
            epoch, total_loss),
              end="")
        print_results(scores, [], self.show_class, t2 - t1)
    def train(
        self,
        epochs: int,
        val_frequency: int,
        print_frequency: int = 20,
        log_frequency: int = 5,
        start_epoch: int = 0,
    ):
        self.model.train()
        with autograd.detect_anomaly():
            for epoch in range(start_epoch, epochs):
                self.model.train()
                data_load_start_time = time.time()

                # Iterate over the samples in the training set
                for batch, labels, fnames in self.train_loader:
                    batch = batch.to(self.device)
                    labels = labels.to(self.device)
                    data_load_end_time = time.time()

                    batch = batch.float()
                    labels = labels.float()

                    # Forward pass of the CNN
                    logits = self.model.forward(batch)

                    labels = torch.unsqueeze(labels, dim=1)

                    # Calculate the loss
                    loss = self.criterion(logits, labels)

                    # Backpropagate error
                    loss.backward()

                    # Update the optimiser
                    self.optimizer.step()
                    self.optimizer.zero_grad()

                    # Log times and loss
                    data_load_time = data_load_end_time - data_load_start_time
                    step_time = time.time() - data_load_end_time
                    if ((self.step + 1) % log_frequency) == 0:
                        self.log_metrics(epoch, loss, data_load_time,
                                         step_time)

                    if ((self.step + 1) % print_frequency) == 0:
                        self.print_metrics(epoch, loss, data_load_time,
                                           step_time)
                    self.step += 1
                    data_load_start_time = time.time()

                self.summary_writer.add_scalar("epoch", epoch, self.step)
                if ((epoch + 1) % val_frequency) == 0:
                    self.validate(epoch)
                    # self.validate() will put the model in validation mode,
                    # so we have to switch back to train mode afterwards
                    self.model.train()
    def UCE_loss(self, alpha, soft_output):
        with autograd.detect_anomaly():
            alpha_0 = alpha.sum(1).unsqueeze(-1).repeat(1, self.output_dim)
            entropy_reg = Dirichlet(alpha).entropy()
            UCE_loss = torch.sum(
                soft_output *
                (torch.digamma(alpha_0) -
                 torch.digamma(alpha))) - self.regr * torch.sum(entropy_reg)

            return UCE_loss
示例#11
0
 def test_g1(self):
     with autograd.detect_anomaly():
         inp = torch.rand(5, 5, requires_grad=True)
         x = inp[1:3, 1:3]
         x = x**2
         d = x.sum()
         print(d.grad)
         d.backward(retain_graph=True)
         print(d.grad)
         print(inp.grad)
示例#12
0
 def _fwd(self, X):
     with torch.no_grad():
         with autograd.detect_anomaly():
             try:
                 X = X.to(self.devices[0])
                 y = self.tcn(X).detach().cpu().numpy()
             except Exception as e:
                 os.system('clear')
                 Tools.pyout(e, force=True)
                 sys.exit(0)
     return y
示例#13
0
    def train_epoch(self, epoch):
        """
        Train model on the training set for 1 epoch, estimate performance and average loss.
        """
        t1 = time.time()
        output_tr = {
            'tp': [],
            'fp': [],
            'fn': [],
            'tn': [],
            'loss': [],
            'preds': [],
            'truth': [],
            'probs': []
        }

        self.model = self.model.train()
        train_iter = self.iterator(self.data['train'],
                                   batch_size=self.params['batch'],
                                   shuffle_=True)

        for batch in train_iter:
            batch = self.convert_batch(batch)

            with autograd.detect_anomaly():
                self.optimizer.zero_grad()

                loss, stats, probs, preds, truth, att_scores = self.model(
                    batch)
                output_tr['preds'] += preds.to('cpu').data.tolist()
                output_tr['probs'] += probs.to('cpu').data.tolist()
                output_tr['truth'] += truth.to('cpu').data.tolist()
                output_tr['loss'] += [loss.item()]
                output_tr['tp'] += [stats['tp'].to('cpu').data.numpy()]
                output_tr['fp'] += [stats['fp'].to('cpu').data.numpy()]
                output_tr['fn'] += [stats['fn'].to('cpu').data.numpy()]
                output_tr['tn'] += [stats['tn'].to('cpu').data.numpy()]

            loss.backward()  # backward computation
            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.params['gc'])  # gradient clipping
            self.optimizer.step()  # update

        t2 = time.time()

        # estimate performance
        total_loss, scores = self.performance(output_tr)
        self.train_res['loss'] += [total_loss]
        self.train_res['score'] += [scores[self.primary_metric]]

        print('Epoch: {:02d} | TRAIN | LOSS = {:.04f}'.format(
            epoch, total_loss),
              end="")
        print_results(scores, self.show_class, t2 - t1)
示例#14
0
 def test_g2(self):
     with autograd.detect_anomaly():
         x = torch.zeros(5, 5, requires_grad=True)
         inp = torch.tensor([1, 3, 1, 3], requires_grad=True)
         x[inp[0]:inp[1], inp[2]:inp[3]] = 1
         x = x**2
         d = x.sum()
         print(d.grad)
         d.backward(retain_graph=True)
         print(d.grad)
         print(inp.grad)
示例#15
0
文件: m3.py 项目: yotamfr/gomut
def train(model, loader, optimizer, n_iter):
    model.train()
    err = 0.0
    i = 0
    pbar = tqdm(total=len(loader), desc='records loaded')
    for i, (seq, beta, prof, dmat, dssp, pdb,
            *_) in enumerate(batch_generator(loader, prepare_pdb_batch)):
        optimizer.zero_grad()

        cmap_hat, dssp_hat = predict(model, seq, beta, prof)
        cmap = get_contact_map(dmat)

        losses = get_loss(cmap_hat, cmap, dssp_hat, dssp)
        loss = sum(losses)
        err += loss.item()
        e = err / (i + 1.)

        writer.add_scalars('M3/Loss', {"train": loss.item()}, n_iter)
        writer.add_scalars('M3/CMAP_BCE', {"train": losses[0].item()}, n_iter)
        writer.add_scalars('M3/SYM_L1', {"train": losses[1].item()}, n_iter)
        if len(losses) == 3:
            writer.add_scalars('M3/SS_CE', {"train": losses[2].item()}, n_iter)

        try:
            with autograd.detect_anomaly():
                loss.backward()
        except RuntimeError as e:
            raise e

        if n_iter % UPLOAD_IMAGE_EVERY == 0:
            for cm1, cm2, dm, pdb_id in zip(cmap_hat.data.cpu().numpy(),
                                            cmap.float().data.cpu().numpy(),
                                            dmat.data.cpu().numpy(), pdb):
                writer.add_image('M3/%s/cmap_pred' % pdb_id,
                                 to_colormap_image(cm1),
                                 n_iter,
                                 dataformats='HWC')
                writer.add_image('M3/%s/cmap_true' % pdb_id,
                                 to_colormap_image(cm2),
                                 n_iter,
                                 dataformats='HWC')
                # writer.add_image('M3/%s/dmap_true' % pdb_id, to_colormap_image(dm), n_iter, dataformats='HWC')

        optimizer.step_and_update_lr(loss.item())
        lr = optimizer.lr

        pbar.set_description("Training Loss:%.6f, LR: %.6f (L=%d)" %
                             (e, lr, seq.size(1)))
        pbar.update(seq.size(0))
        n_iter += 1

    pbar.close()
    return n_iter
示例#16
0
def train(train_data):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    total_aux_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    for batch, i in enumerate(range(0, train_data.size(1) - 1, BPTT)):
        with autograd.detect_anomaly():
            data, targets = get_batch(train_data,
                                      i)  # data is [35, 20], targets is [700]
            trg_mask = create_mask(data).to(device)
            optimizer.zero_grad()
            for submodule in model.modules():
                submodule.register_forward_hook(nan_hook)
            output, aux_loss = model(src=None,
                                     trg=data,
                                     src_mask=None,
                                     trg_mask=trg_mask,
                                     is_lm=True)
            output = output.view(-1, ntokens)
            loss = criterion(output, targets)
            final_loss = loss + aux_loss
            final_loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP)
            optimizer.step_and_update_lr()

            model_has_nan = check_for_nans(model)
            if model_has_nan:
                print("Nans have been identified")

            total_loss += loss.item()
            total_aux_loss += aux_loss.item()

            if batch == 0:
                print("Running without errors")

            if batch % LOG_INTERVAL == 0 and batch > 0:
                cur_loss = total_loss / LOG_INTERVAL  # curr loss is independent of the aux loss
                curr_aux_loss = total_aux_loss / LOG_INTERVAL

                elapsed = time.time() - start_time
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | aux_loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, batch,
                        train_data.size(1) // BPTT, LR,
                        elapsed * 1000 / LOG_INTERVAL, cur_loss, curr_aux_loss,
                        math.exp(cur_loss)))
                total_loss = 0.
                total_aux_loss = 0.
                start_time = time.time()
示例#17
0
def run_experiment(dataset_folder, ratio_0, ratio_p1, use_cce, 
                   batch_size, lr, epochs, early_stopping_patience,
                   emb_dim, lstm_hidden_dim, emb_dropout_p, lstm_dropout_p, n_classes,
                   checkpoint_file, device):
    tb_writer = SummaryWriter(comment=f"_r0_{ratio_0}_rp1_{ratio_p1}_cce_{use_cce}")
    vocab_size, train_it, val_it, test_it = read_data(dataset_folder, ratio_0, ratio_p1, batch_size, device)
    model = BiLstmClassifier(vocab_size, emb_dim, lstm_hidden_dim, emb_dropout_p, lstm_dropout_p, n_classes).to(device)
    opt = optim.Adam(model.parameters(), lr)
    criterion = complement_cross_entropy_loss if use_cce else one_hot_cross_entropy_loss
    with autograd.detect_anomaly():
        model = train_model(model, n_classes, criterion, opt, train_it, val_it, epochs, early_stopping_patience, checkpoint_file, tb_writer)
    score = evaluate_model(model, n_classes, test_it, tb_writer)
    return score
示例#18
0
    def train(self, states, critic):
        with autograd.detect_anomaly():
            # transform to torch tensors
            states = torch.from_numpy(states).float().to(self._device)

            self._optimizer.zero_grad()
            # compute actions taken in these states by the actor
            _actionsPred = self._backbone([states])
            # compose the critic over the actor outputs (sandwich), which effectively does g(f(x))
            _lossActor = -critic(states, _actionsPred).mean()
            _lossActor.backward()
            # take a step with the optimizer
            self._optimizer.step()
示例#19
0
 def train(self, loss, clip_grad_norm=None):
     assert self.is_training()
     self.optimizer.zero_grad()
     if self.debug:
         debug_context = autograd.detect_anomaly()
     else:
         debug_context = contextlib.nullcontext()
     with debug_context:
         loss.backward()
     if clip_grad_norm is not None:
         nn.utils.clip_grad_norm_(self.model.parameters(), clip_grad_norm)
     self.optimizer.step()
     self.increment_train_steps()
示例#20
0
 def _forward_pass_with_anomaly_detection(
         self,
         patches: torch.Tensor,
         mask: torch.Tensor = None,
         labels: torch.Tensor = None) -> SegmentationForwardPass.Result:
     if self.detect_anomaly:
         with autograd.detect_anomaly():
             result = self._forward_pass(patches, mask, labels)
         if result.loss is not None and (math.isnan(result.loss)
                                         or math.isinf(result.loss)):
             raise RuntimeError(
                 f"The loss computation returned {result.loss}")
         return result
     return self._forward_pass(patches, mask, labels)
示例#21
0
def predict_test(testset):
    # Create dataset loader
    # Create dataset loader
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              drop_last=False)

    # restore checkpoint
    restore_checkpoint(args)

    # Set loss function
    args.criterion = torch.nn.MSELoss()

    # Predict all test elements and measure
    run_loss = 0
    for batch_idx, batch in enumerate(test_loader, 1):
        # Unpack batch
        inputs, targets = batch

        # Send to device
        inputs = inputs.to(args.device)
        targets = targets.to(args.device)

        # Calculate gradients and update
        with autograd.detect_anomaly():
            # forward
            outputs = args.net(inputs)

            # get maximum from each layer
            print(outputs.shape)
            idx_inpt = get_max(inputs, dim=(2, 3))
            idx_otpt = get_max(outputs, dim=(2, 3))
            print(idx_inpt.shape)
            print(idx_inpt)
            print(idx_otpt.shape)
            print(idx_otpt)
            input()

            # calculate loss
            loss = args.criterion(outputs, targets)
            run_loss += loss.item()

        if batch_idx < 10:
            # Plot predictions
            # img = imshow_bboxes(inputs, targets, args, t_outputs)
            # args.writer.add_image('Test/predicted', img, batch_idx)
            pass
        else:
            break
    def test_LinearOperator_radon_gradcheck(self):
        # Set image size.
        image_size = (5, 4)

        # Define angles.
        nangles = 180
        angles = np.linspace(0, np.pi, nangles, False)

        # Create operators.
        R, Radj, ndet = radon.radon2d(*image_size, angles)
        data_size = (nangles, ndet)

        # Create instances for use with torch.
        K = radon.RadonTransform(R, Radj, data_size)
        Kadj = radon.BackProjection(R, Radj, image_size)

        # Apply to dummy input.
        x = torch.randn((1, 1, *image_size),
                        requires_grad=True,
                        dtype=torch.double)
        f = K(x)

        # Check for simple loss.
        loss = f.sum()
        loss.backward()
        torch.allclose(x.grad, Kadj(x.new_ones(1, 1, *data_size)))

        def op_fun(x):
            out = LinearOperator.apply(x, K, Kadj)
            return out.sum()

        # Check for anomalies.
        with tag.detect_anomaly():
            x = torch.randn(1,
                            1,
                            *image_size,
                            requires_grad=True,
                            dtype=torch.double)
            out = op_fun(x)
            out.backward()

        # Check numerical gradient up to certain tolerance.
        # Due to inaccuracy of adjoint this check fails.
        x = torch.randn(1,
                        1,
                        *image_size,
                        requires_grad=True,
                        dtype=torch.double)
        tag.gradcheck(lambda t: K(t), x)
示例#23
0
def train(args, model, device, train_loader, optimizer, epoch, is_display):
    model.train()
    total_loss = 0
    for batch_idx, (imgs, labels, gt) in enumerate(train_loader):
        with autograd.detect_anomaly():
            if args.using_contrastive_loss:
                target = gt.type(torch.FloatTensor).view(gt.shape[0], 1)
                target = target.view(-1)
                imgs[0], imgs[1], imgs[2], imgs[3], target = imgs[0].to(
                    device), imgs[1].to(device), imgs[2].to(
                        device), imgs[3].to(device), target.to(device)
                optimizer.zero_grad()
                _, A, B, C, D = model(imgs[0], imgs[1], imgs[2], imgs[3])
                embs = [A, B, C, D]
                loss = criteria(embs, target)
                total_loss += loss.item()
                sum_loss = loss
                if sub_criteria != None:
                    sub_loss = sub_criteria(embs, target)
                    total_loss += sub_loss.item()
                    sum_loss += sub_loss
                if sub_criteria_2 != None:
                    sub_loss = sub_criteria_2(embs, target)
                    total_loss += sub_loss.item()
                    sum_loss += sub_loss
            else:
                target = gt.type(torch.FloatTensor).view(gt.shape[0], 1)
                imgs[0], imgs[1], imgs[2], imgs[3], target = imgs[0].to(
                    device), imgs[1].to(device), imgs[2].to(
                        device), imgs[3].to(device), target.to(device)
                optimizer.zero_grad()
                output, emb_a, emb_b, emb_c, emb_d = model(
                    imgs[0], imgs[1], imgs[2], imgs[3])
                loss = criteria(output, target)
                total_loss += loss.item()
                sum_loss = loss
            sum_loss.backward()
            optimizer.step()
            if batch_idx % args.log_interval == 0:
                logging.info(
                    '[Epoch {}/{}] [Batch {}/{}] [loss: {:.6f}]'.format(
                        epoch, args.epoch, batch_idx, len(train_loader),
                        sum_loss.item()))
    total_loss /= len(train_loader)
    logging.info('[Epoch {}/{}] [loss: {:.6f}]'.format(epoch, args.epoch,
                                                       total_loss))
    return total_loss
    def train(self):
        LOGGER.addHandler(
            logging.FileHandler(
                os.path.join(self.args.checkpoint_dir, 'logger_train.log')))
        writer = SummaryWriter(log_dir=self.args.checkpoint_dir)
        start_time = time()

        # setup dataset/data loader
        loader = {k: self.__setup_loader(k) for k in ['train', 'valid']}
        LOGGER.info('data_loader: %s' % str(list(loader.keys())))

        # start training
        LOGGER.info('*** start training from step %i, epoch %i ***' %
                    (self.__step, self.__epoch))
        try:
            with detect_anomaly():
                while True:

                    data_loader, _ = loader['train']
                    if_training_finish = self.__epoch_train(data_loader,
                                                            writer=writer)
                    self.release_cache()

                    data_loader, info_loader = loader['valid']
                    self.__epoch_valid(data_loader,
                                       info_loader=info_loader,
                                       writer=writer,
                                       prefix='valid',
                                       sample_n=SAMPLE_N)
                    self.release_cache()

                    if if_training_finish:
                        break

                    self.__epoch += 1
        except RuntimeError:
            LOGGER.exception(
                '*** RuntimeError (NaN found, see above log in detail) ***')

        except KeyboardInterrupt:
            LOGGER.info('*** KeyboardInterrupt ***')

        self.__save()
        LOGGER.info('[training completed, %0.2f sec in total]' %
                    (time() - start_time))
        writer.close()
        LOGGER.info('ckpt saved at %s' % self.args.checkpoint_dir)
    def test_LinearOperator_radon_cuda(self):
        # Set image size.
        image_size = 5, 4

        # Define angles.
        nangles = 180
        angles = np.linspace(0, np.pi, nangles, False)

        # Check if GPU is available.
        cuda = torch.cuda.is_available()
        device = torch.device('cuda' if cuda else 'cpu')

        # Create operators.
        R, Radj, ndet = radon.radon2d(*image_size, angles, cuda)
        data_size = (nangles, ndet)

        # Create instances for use with torch.
        K = radon.RadonTransform(R, Radj, data_size)
        Kadj = radon.BackProjection(R, Radj, image_size)

        # Apply to dummy input.
        x = torch.randn((1, 1, *image_size),
                        requires_grad=True,
                        dtype=torch.double,
                        device=device)
        f = K(x)

        # Check for simple loss.
        loss = f.sum()
        loss.backward()
        torch.allclose(x.grad, Kadj(x.new_ones(1, 1, *data_size)))

        def op_fun(x):
            out = LinearOperator.apply(x, K, Kadj)
            return out.sum()

        # Check for anomalies.
        with tag.detect_anomaly():
            x = torch.randn(1,
                            1,
                            *image_size,
                            requires_grad=True,
                            dtype=torch.double,
                            device=device)
            out = op_fun(x)
            out.backward()
示例#26
0
def train():
    ds = GlazeCompositionDataset()
    train_loader, val_loader = get_data_loaders(ds)
    out_D = len(ds.compounds)
    print('Out Dimension is %i', out_D)
    model = Net(out_D)
    loss = torch.nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.2, momentum=0.5)
    trainer = create_supervised_trainer(model, optimizer, loss)
    metrics = {'MSE': ignite.metrics.RootMeanSquaredError()}
    evaluator = create_supervised_evaluator(model, metrics)
    saver = ignite.handlers.ModelCheckpoint('./checkpoints/models',
                                            'chkpoint',
                                            save_interval=2,
                                            n_saved=4,
                                            create_dir=True,
                                            require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, saver,
                              {'glaze_net_3': model})
    print(model.state_dict().keys())

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(trainer):
        iter = (trainer.state.iteration - 1) % len(train_loader) + 1
        if iter % 10 == 0:
            print("Epoch[{}] Iteration[{}/{}] Loss: {:.10f}".format(
                trainer.state.epoch, iter, len(train_loader),
                trainer.state.output))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        print("Training Results - Epoch: {}  MSE: {:.2f}".format(
            trainer.state.epoch, metrics['MSE']))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        print("Validation Results - Epoch: {}  MSE: {:.2f}".format(
            trainer.state.epoch, metrics['MSE']))

    with autograd.detect_anomaly():
        trainer.run(train_loader, max_epochs=100)
    return model
示例#27
0
    def run(self):
        try:
            # display configuration
            self.logger.debug('>>> configuration: \n' + conf().dump().strip())

            # load pre-trained model
            current_epoch = self.load_model()

            # train
            with autograd.detect_anomaly():
                self.train(current_epoch)

            # evaluate
            self.evaluate()
        except Exception as e:
            self.logger.error(e, exc_info=True)
            raise e
示例#28
0
 def run_batch(self, data, visualize=False):
     """If visualize is True, a visualize method of the model module is called."""
     if not isinstance(data, list) and not isinstance(data, tuple):
         data = [data]
     if self.is_training():
         context = contextlib.nullcontext()
     else:
         context = torch.no_grad()
     if self.debug:
         debug_context = autograd.detect_anomaly()
     else:
         debug_context = contextlib.nullcontext()
     with context, debug_context:
         if not visualize:
             return self.model(*data)
         else:
             return self.model.visualize(*data)
示例#29
0
文件: m1.py 项目: yotamfr/gomut
def train(model, loader, optimizer, n_iter):
    model.train()
    err = 0.0
    i = 0
    pbar = tqdm(total=len(loader), desc='pairs loaded')
    for i, (s1, s2, b1, b2, p1, p2, m1, m2, idx, pdb1, pdb2,
            *_) in enumerate(batch_generator(loader, prepare_pairs_batch)):
        optimizer.zero_grad()

        assert s1.shape == s2.shape
        assert m1.shape == m2.shape
        assert p1.shape == p2.shape

        ddm_hat, ddm = predict_2ways(model, m1, m2, s1, s2, b1, b2, p1, p2,
                                     idx)
        loss = get_loss(ddm_hat, ddm)
        err += loss.item()
        e = err / (i + 1.)

        writer.add_scalars('M1/Loss', {"train": e}, n_iter)

        try:
            with autograd.detect_anomaly():
                loss.backward()
        except RuntimeError as e:
            raise e

        if n_iter % UPLOAD_IMAGE_EVERY == 0:
            write_true_pred_pairs("M1", n_iter, pdb1, pdb2,
                                  ddm.data.cpu().numpy(),
                                  ddm_hat.data.cpu().numpy())
            write_dist_mats_pairs("M1", n_iter, pdb1, pdb2,
                                  m1.data.cpu().numpy(),
                                  m2.data.cpu().numpy())

        optimizer.step_and_update_lr(loss.item())
        lr = optimizer.lr

        pbar.set_description("Training Loss:%.6f, LR: %.6f (L=%d)" %
                             (e, lr, s1.size(1)))
        pbar.update(len(idx))
        n_iter += 1

    pbar.close()
    return n_iter
示例#30
0
文件: baseali.py 项目: ankitkv/ALI
    def loss_function(self, forward_ret, labels=None):
        if self.is_training():
            forward_batch, backward_batch = self.get_disc_batches(forward_ret)
            if self.debug:
                debug_context = autograd.detect_anomaly()
            else:
                debug_context = nullcontext()
            with debug_context:
                d_ps = self.disc(*forward_batch)
                d_qs = self.disc(*backward_batch)

            if self.train_disc():
                if self.flags.loss == 'wasserstein':
                    if self.flags.gp:
                        grad_penalty = self.gradient_penalty(
                            backward_batch,
                            forward_batch,
                            context=debug_context)
                    else:
                        grad_penalty = 0
                    loss = -d_ps.mean() + d_qs.mean() + (10.0 * grad_penalty) + self.flags.wasserstein_nodrift * \
                        ((d_ps + d_qs)**2).mean()

                else:
                    loss = (F.binary_cross_entropy_with_logits(
                        d_ps, torch.ones_like(d_ps)) +
                            F.binary_cross_entropy_with_logits(
                                d_qs, torch.zeros_like(d_qs)))
                self.d_loss = loss.item()
            else:
                if self.flags.loss == 'wasserstein':
                    loss = d_ps.mean() - d_qs.mean()
                else:
                    loss = (F.binary_cross_entropy_with_logits(
                        d_ps, torch.zeros_like(d_ps)) +
                            F.binary_cross_entropy_with_logits(
                                d_qs, torch.ones_like(d_qs)))
                self.g_loss = loss.item()

            g_loss = self.g_loss
            d_loss = self.d_loss
        else:
            loss, g_loss, d_loss = 0.0, 0.0, 0.0

        return loss, g_loss, d_loss