Пример #1
0
    def train_val(self, lstm, dual_target=None):
        """
        Function that does the training and validation
        :param lstm: whether the model is an LSTM
        :return:
        """
        #print("Dual target in train_val: %r"%dual_target)
        for epoch in range(self.start_epoch, self.config['n_epochs']):
            # VALIDATION
            if self.config['do_val'] and (
                (epoch % self.config['val_freq'] == 0) or
                (epoch == self.config['n_epochs'] - 1)):
                val_batch_time = Logger.AverageMeter()
                val_loss = Logger.AverageMeter()
                if self.extra_criterion:
                    val_extra_loss = Logger.AverageMeter()
                self.model.eval()
                end = time.time()
                val_data_time = Logger.AverageMeter()
                for batch_idx, (data, target) in enumerate(self.val_loader):
                    val_data_time.update(time.time() - end)
                    #print(target[1].size())
                    #a = 1.0/0.0

                    kwargs = dict(target=target,
                                  criterion=self.train_criterion,
                                  optim=self.optimizer,
                                  train=False)
                    if lstm:
                        loss, output = step_lstm(data, self.model,
                                                 self.config['cuda'], **kwargs)
                    else:
                        loss, output = step_feedfwd(data, self.model,
                                                    self.config['cuda'],
                                                    **kwargs)

                    val_loss.update(loss)
                    val_batch_time.update(time.time() - end)

                    if self.extra_criterion:
                        dual_target = type(target) is list or type(
                            target) is tuple
                        with torch.set_grad_enabled(False):
                            if self.config['cuda']:
                                if dual_target:
                                    target = tuple(
                                        single_target.cuda(async=True)
                                        for single_target in target)
                                else:
                                    target = target.cuda(async=True)

                            if dual_target:
                                target_var = tuple(
                                    Variable(t, requires_grad=False)
                                    for t in target)
                            else:
                                target_var = Variable(target,
                                                      requires_grad=False)

                            extra_loss = self.extra_criterion(
                                output, target_var)
                            extra_loss = extra_loss.item()
                            val_extra_loss.update(extra_loss)

                    if batch_idx % self.config['print_freq'] == 0:
                        print_string = 'Val {:s}: Epoch {:d}\t' \
                              'Batch {:d}/{:d}\t' \
                              'Data Time {:.4f} ({:.4f})\t' \
                              'Batch Time {:.4f} ({:.4f})\t' \
                              'Loss {:f}\t' \
                            .format(self.experiment, epoch, batch_idx, len(self.val_loader) - 1,
                                   val_data_time.val, val_data_time.avg, val_batch_time.val,
                                   val_batch_time.avg, loss)

                        if self.extra_criterion:
                            print_string += 'Loss Extra Scale {:f}\t'.format(
                                extra_loss)

                        print(print_string)

                        if self.config['log_visdom']:
                            self.vis.save(envs=[self.vis_env])

                    end = time.time()

                print_string = 'Val {:s}: Epoch {:d}, val_loss {:f}' \
                    .format(self.experiment, epoch, val_loss.avg)

                if self.extra_criterion:
                    print_string += ' val_extra_loss {:f}\t'.format(
                        val_extra_loss.avg)

                print(print_string)

                if self.config['log_visdom']:
                    val_loss_avg = val_loss.avg
                    self.vis.line(X=np.asarray([epoch]),
                                  Y=np.asarray([val_loss_avg]),
                                  win=self.loss_win,
                                  name='val_loss',
                                  update='append',
                                  env=self.vis_env)

                    if self.extra_criterion:
                        val_extra_loss_avg = val_extra_loss.avg
                        self.vis.line(X=np.asarray([epoch]),
                                      Y=np.asarray([val_extra_loss_avg]),
                                      win=self.extra_loss_win,
                                      name='val_extra_loss',
                                      update='append',
                                      env=self.vis_env)

                    self.vis.save(envs=[self.vis_env])

            # SAVE CHECKPOINT
            if epoch % self.config['snapshot'] == 0:
                self.save_checkpoint(epoch)
                print('Epoch {:d} checkpoint saved for {:s}'.\
                    format(epoch, self.experiment))

            # ADJUST LR
            lr = self.optimizer.adjust_lr(epoch)
            if self.config['log_visdom']:
                self.vis.line(X=np.asarray([epoch]),
                              Y=np.asarray([np.log10(lr)]),
                              win=self.lr_win,
                              name='learning_rate',
                              update='append',
                              env=self.vis_env)

            # TRAIN
            self.model.train()
            train_data_time = Logger.AverageMeter()
            train_batch_time = Logger.AverageMeter()
            end = time.time()

            for batch_idx, (data, target) in enumerate(self.train_loader):
                train_data_time.update(time.time() - end)
                #print(target[1].size())

                kwargs = dict(target=target,
                              criterion=self.train_criterion,
                              optim=self.optimizer,
                              train=True,
                              max_grad_norm=self.config['max_grad_norm'])
                if lstm:
                    loss, output = step_lstm(data, self.model,
                                             self.config['cuda'], **kwargs)
                else:
                    loss, output = step_feedfwd(data, self.model,
                                                self.config['cuda'], **kwargs)

                if self.extra_criterion:
                    dual_target = type(target) is list or type(target) is tuple
                    with torch.set_grad_enabled(False):
                        if self.config['cuda']:
                            if dual_target:
                                target = tuple(
                                    single_target.cuda(async=True)
                                    for single_target in target)
                            else:
                                target = target.cuda(async=True)

                        if dual_target:
                            target_var = tuple(
                                Variable(t, requires_grad=False)
                                for t in target)
                        else:
                            target_var = Variable(target, requires_grad=False)

                        extra_loss = self.extra_criterion(output, target_var)
                        extra_loss = extra_loss.item()

                train_batch_time.update(time.time() - end)

                if batch_idx % self.config['print_freq'] == 0:
                    n_iter = epoch * len(self.train_loader) + batch_idx
                    epoch_count = float(n_iter) / len(self.train_loader)

                    print_string = 'Train {:s}: Epoch {:d}\t' \
                          'Batch {:d}/{:d}\t' \
                          'Data Time {:.4f} ({:.4f})\t' \
                          'Batch Time {:.4f} ({:.4f})\t' \
                          'Loss {:f}\t' \
                        .format(self.experiment, epoch, batch_idx, len(self.train_loader) - 1,
                               train_data_time.val, train_data_time.avg, train_batch_time.val,
                               train_batch_time.avg, loss)

                    if self.extra_criterion:
                        print_string += 'Loss Extra Scale {:f}\t'.format(
                            extra_loss)

                    print_string += 'lr: {:f}'.format(lr)
                    print(print_string)

                    end = time.time()

                    if self.config['log_visdom']:
                        self.vis.line(X=np.asarray([epoch_count]),
                                      Y=np.asarray([loss]),
                                      win=self.loss_win,
                                      name='train_loss',
                                      update='append',
                                      env=self.vis_env)
                        if self.extra_criterion:
                            self.vis.line(X=np.asarray([epoch_count]),
                                          Y=np.asarray([extra_loss]),
                                          win=self.extra_loss_win,
                                          name='train_extra_loss',
                                          update='append',
                                          env=self.vis_env)

                        if self.n_criterion_params:
                            for name, v in self.train_criterion.named_parameters(
                            ):
                                v = v.item()
                                self.vis.line(X=np.asarray([epoch_count]),
                                              Y=np.asarray([v]),
                                              win=self.criterion_param_win,
                                              name=name,
                                              update='append',
                                              env=self.vis_env)
                        self.vis.save(envs=[self.vis_env])

                end = time.time()

        # Save final checkpoint
        epoch = self.config['n_epochs']
        self.save_checkpoint(epoch)
        print('Epoch {:d} checkpoint saved'.format(epoch))
        if self.config['log_visdom']:
            self.vis.save(envs=[self.vis_env])
Пример #2
0
    def train_val(self, lstm):
        """
    Function that does the training and validation
    :param lstm: whether the model is an LSTM
    :return: 
    """
        for epoch in range(self.start_epoch, self.config['n_epochs']):
            # VALIDATION
            if self.config['do_val'] and (
                (epoch % self.config['val_freq'] == 0) or
                (epoch == self.config['n_epochs'] - 1)):
                val_batch_time = Logger.AverageMeter()
                val_loss = Logger.AverageMeter()
                self.model.eval()
                end = time.time()
                val_data_time = Logger.AverageMeter()
                for batch_idx, (data, target) in enumerate(self.val_loader):
                    val_data_time.update(time.time() - end)

                    kwargs = dict(target=target,
                                  criterion=self.val_criterion,
                                  optim=self.optimizer,
                                  train=False)
                    if lstm:
                        loss, _ = step_lstm(data, self.model,
                                            self.config['cuda'], **kwargs)
                    else:
                        loss, _ = step_feedfwd(data, self.model,
                                               self.config['cuda'], **kwargs)

                    val_loss.update(loss)
                    val_batch_time.update(time.time() - end)

                    if batch_idx % self.config['print_freq'] == 0:
                        print('Val {:s}: Epoch {:d}\t' \
                              'Batch {:d}/{:d}\t' \
                              'Data time {:.4f} ({:.4f})\t' \
                              'Batch time {:.4f} ({:.4f})\t' \
                              'Loss {:f}' \
                          .format(self.experiment, epoch, batch_idx, len(self.val_loader)-1,
                          val_data_time.val, val_data_time.avg, val_batch_time.val,
                          val_batch_time.avg, loss))
                        if self.config['log_visdom']:
                            self.vis.save(envs=[self.vis_env])

                    end = time.time()

                print('Val {:s}: Epoch {:d}, val_loss {:f}'.format(
                    self.experiment, epoch, val_loss.avg))

                if self.config['log_visdom']:
                    self.vis.updateTrace(X=np.asarray([epoch]),
                                         Y=np.asarray([val_loss.avg]),
                                         win=self.loss_win,
                                         name='val_loss',
                                         append=True,
                                         env=self.vis_env)
                    self.vis.save(envs=[self.vis_env])

            # SAVE CHECKPOINT
            if epoch % self.config['snapshot'] == 0:
                self.save_checkpoint(epoch)
                print('Epoch {:d} checkpoint saved for {:s}'.\
                  format(epoch, self.experiment))

            # ADJUST LR
            lr = self.optimizer.adjust_lr(epoch)
            if self.config['log_visdom']:
                self.vis.updateTrace(X=np.asarray([epoch]),
                                     Y=np.asarray([np.log10(lr)]),
                                     win=self.lr_win,
                                     name='learning_rate',
                                     append=True,
                                     env=self.vis_env)

            # TRAIN
            self.model.train()
            train_data_time = Logger.AverageMeter()
            train_batch_time = Logger.AverageMeter()
            end = time.time()
            for batch_idx, (data, target) in enumerate(self.train_loader):
                train_data_time.update(time.time() - end)

                kwargs = dict(target=target,
                              criterion=self.train_criterion,
                              optim=self.optimizer,
                              train=True,
                              max_grad_norm=self.config['max_grad_norm'])
                if lstm:
                    loss, _ = step_lstm(data, self.model, self.config['cuda'],
                                        **kwargs)
                else:
                    loss, _ = step_feedfwd(data, self.model,
                                           self.config['cuda'], **kwargs)

                train_batch_time.update(time.time() - end)

                if batch_idx % self.config['print_freq'] == 0:
                    n_iter = epoch * len(self.train_loader) + batch_idx
                    epoch_count = float(n_iter) / len(self.train_loader)
                    print('Train {:s}: Epoch {:d}\t' \
                          'Batch {:d}/{:d}\t' \
                          'Data Time {:.4f} ({:.4f})\t' \
                          'Batch Time {:.4f} ({:.4f})\t' \
                          'Loss {:f}\t' \
                          'lr: {:f}'.\
                      format(self.experiment, epoch, batch_idx, len(self.train_loader)-1,
                      train_data_time.val, train_data_time.avg, train_batch_time.val,
                      train_batch_time.avg, loss, lr))
                    if self.config['log_visdom']:
                        self.vis.updateTrace(X=np.asarray([epoch_count]),
                                             Y=np.asarray([loss]),
                                             win=self.loss_win,
                                             name='train_loss',
                                             append=True,
                                             env=self.vis_env)
                        if self.n_criterion_params:
                            for name, v in self.train_criterion.named_parameters(
                            ):
                                v = v.data.cpu().numpy()[0]
                                self.vis.updateTrace(
                                    X=np.asarray([epoch_count]),
                                    Y=np.asarray([v]),
                                    win=self.criterion_param_win,
                                    name=name,
                                    append=True,
                                    env=self.vis_env)
                        self.vis.save(envs=[self.vis_env])

                end = time.time()

        # Save final checkpoint
        epoch = self.config['n_epochs']
        self.save_checkpoint(epoch)
        print('Epoch {:d} checkpoint saved'.format(epoch))
        if self.config['log_visdom']:
            self.vis.save(envs=[self.vis_env])