Пример #1
0
def test(i, epoch, model, data_loader, args):
    model.eval()
    loss = []
    correct = []

    for data, target in data_loader.test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()

        with torch.no_grad():
            data, target = Variable(data), Variable(target)
            if len(list(target.size())) > 1:  #XXX: hax
                target = torch.squeeze(target)

            target = target == i
            target = target.type(long_type(args.cuda))

            output = model(data, which_model=i)
            loss_t = model.loss_function(output, target)
            correct_t = softmax_accuracy(output, target)

            loss.append(loss_t.detach().cpu().data[0])
            correct.append(correct_t)

    loss = np.mean(loss)
    acc = np.mean(correct)
    print('\n[POOL_{} | {} samples]Test Epoch: {}\tAverage loss: {:.4f}\tAverage Accuracy: {:.4f}\n'.format(
        i, num_samples_in_loader(data_loader.test_loader), epoch, loss, acc))
    return loss, acc
Пример #2
0
def train(i, epoch, model, optimizer, data_loader, args):
    '''  i : which submodel to train '''
    model.train()
    for batch_idx, (data, target) in enumerate(data_loader.train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()

        # data, target = Variable(data), Variable(target)
        # if len(list(target.size())) > 1:  #XXX: hax
        #     target = torch.squeeze(target)
        data, target = Variable(data), Variable(target)
        if len(list(target.size())) > 1:  #XXX: hax
            target = torch.squeeze(target)

        target = target == i
        target = target.type(long_type(args.cuda))

        optimizer.zero_grad()

        # project to the output dimension
        output = model(data, which_model=i)
        loss = model.loss_function(output, target)
        correct = softmax_accuracy(output, target)

        # compute loss
        loss.backward()
        optimizer.step()

        # log every nth interval
        if batch_idx % args.log_interval == 0:
            # the total number of samples is different
            # if we have filtered using the class_sampler
            if hasattr(data_loader.train_loader, "sampler") \
               and hasattr(data_loader.train_loader.sampler, "num_samples"):
                num_samples = data_loader.train_loader.sampler.num_samples
            else:
                num_samples = len(data_loader.train_loader.dataset)

            print('[POOL_{}]Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.4f}'.format(
                i, epoch, batch_idx * len(data), num_samples,
                100. * batch_idx * len(data) / num_samples,
                loss.data[0], correct))
Пример #3
0
def execute_graph(epoch,
                  model,
                  loader,
                  grapher,
                  optimizer=None,
                  prefix='test'):
    """ execute the graph; when 'train' is in the name the model runs the optimizer

    :param epoch: the current epoch number
    :param model: the torch model
    :param loader: the train or **TEST** loader
    :param grapher: the graph writing helper (eg: visdom / tf wrapper)
    :param optimizer: the optimizer
    :param prefix: 'train', 'test' or 'valid'
    :returns: dictionary with scalars
    :rtype: dict

    """
    start_time = time.time()
    model.eval() if prefix == 'test' else model.train()
    assert optimizer is not None if 'train' in prefix or 'valid' in prefix else optimizer is None
    loss_map, num_samples, print_once = {}, 0, False

    # iterate over train and valid data
    for minibatch, labels in loader:
        minibatch = minibatch.cuda() if args.cuda else minibatch
        labels = labels.cuda() if args.cuda else labels
        if args.half:
            minibatch = minibatch.half()

        if 'train' in prefix:
            optimizer.zero_grad()  # zero gradients on optimizer

        with torch.no_grad() if prefix == 'test' else dummy_context():
            pred_logits = model(minibatch)  # get normal predictions
            loss_t = {
                'loss_mean':
                F.cross_entropy(
                    input=pred_logits,
                    target=labels),  # change to F.mse_loss for regression
                'accuracy_mean':
                softmax_accuracy(preds=F.softmax(pred_logits, -1),
                                 targets=labels)
            }
            loss_map = _add_loss_map(loss_map, loss_t)
            num_samples += minibatch.size(0)

        if 'train' in prefix:  # compute bp and optimize
            loss_t['loss_mean'].backward()
            optimizer.step()

        if args.debug_step:  # for testing purposes
            break

    # compute the mean of the map
    loss_map = _mean_map(loss_map)  # reduce the map to get actual means
    print(
        '{}[Epoch {}][{} samples][{:.2f} sec]: Loss: {:.4f}\tAccuracy: {:.4f}'.
        format(prefix, epoch, num_samples,
               time.time() - start_time, loss_map['loss_mean'].item(),
               loss_map['accuracy_mean'].item() * 100.0))

    # plot the test accuracy, loss and images
    register_plots({**loss_map},
                   grapher,
                   epoch=epoch,
                   prefix='linear' + prefix)
    register_images({'input_imgs': F.upsample(minibatch, size=(100, 100))},
                    grapher,
                    prefix=prefix)

    # return this for early stopping
    loss_val = loss_map['loss_mean'].detach().item()
    loss_map.clear()
    return loss_val
Пример #4
0
def execute_graph(epoch,
                  model,
                  data_loader,
                  grapher,
                  optimizer=None,
                  prefix='test',
                  plot_mem=False):
    ''' execute the graph; when 'train' is in the name the model runs the optimizer '''
    start_time = time.time()
    model.eval() if not 'train' in prefix else model.train()
    assert optimizer is not None if 'train' in prefix else optimizer is None
    loss_map, num_samples = {}, 0
    x_original, x_related = None, None

    for item in data_loader:
        # first destructure the data, cuda-ize and wrap in vars
        x_original, x_related, labels = _unpack_data_and_labels(item)
        x_related, labels = cudaize(x_related,
                                    is_data_tensor=True), cudaize(labels)

        if 'train' in prefix:  # zero gradients on optimizer
            optimizer.zero_grad()

        with torch.no_grad() if 'train' not in prefix else dummy_context():
            with torch.autograd.detect_anomaly(
            ) if args.detect_anomalies else dummy_context():
                x_original, x_related = generate_related(
                    x_related, x_original, args)
                #x_original = cudaize(x_original, is_data_tensor=True)

                # run the model and gather the loss map
                data_to_infer = x_original if args.use_full_resolution else x_related
                loss_logits_t = model(data_to_infer)
                loss_t = {
                    'loss_mean':
                    F.cross_entropy(input=loss_logits_t, target=labels)
                }

                # compute accuracy and aggregate into map
                loss_t['accuracy_mean'] = softmax_accuracy(F.softmax(
                    loss_logits_t, -1),
                                                           labels,
                                                           size_average=True)

                loss_map = _add_loss_map(loss_map, loss_t)
                num_samples += x_related.size(0)

        if 'train' in prefix:  # compute bp and optimize
            if args.half is True:
                optimizer.backward(loss_t['loss_mean'])
                # with amp_handle.scale_loss(loss_t['loss_mean'], optimizer,
                #                            dynamic_loss_scale=True) as scaled_loss:
                #     scaled_loss.backward()
            else:
                loss_t['loss_mean'].backward()

            if args.clip > 0:
                # TODO: clip by value or norm? torch.nn.utils.clip_grad_value_
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) \
                    if not args.half is True else optimizer.clip_master_grads(args.clip)

            optimizer.step()
            del loss_t

    loss_map = _mean_map(loss_map)  # reduce the map to get actual means
    correct_percent = 100.0 * loss_map['accuracy_mean']

    print(
        '''{}[Epoch {}][{} samples][{:.2f} sec]:Average loss: {:.4f}\tAcc: {:.4f}'''
        .format(prefix, epoch, num_samples,
                time.time() - start_time, loss_map['loss_mean'].item(),
                correct_percent))

    # add memory tracking
    if plot_mem:
        process = psutil.Process(os.getpid())
        loss_map['cpumem_scalar'] = process.memory_info().rss * 1e-6
        loss_map['cudamem_scalar'] = torch.cuda.memory_allocated() * 1e-6

    # plot all the scalar / mean values
    register_plots(loss_map, grapher, epoch=epoch, prefix=prefix)

    # plot images, crops, inlays and all relevant images
    def resize_4d_or_5d(img):
        if len(img.shape) == 4:
            return F.interpolate(img, (32, 32),
                                 mode='bilinear',
                                 align_corners=True)
        elif len(img.shape) == 5:
            return torch.cat([
                F.interpolate(img[:, i, :, :, :], (32, 32),
                              mode='bilinear',
                              align_corners=True) for i in range(img.shape[1])
            ], 0)
        else:
            raise Exception("only 4d or 5d images supported")

    # input_imgs_map = {
    #     'related_imgs': F.interpolate(x_related, (32, 32), mode='bilinear', align_corners=True),
    #     'original_imgs': F.interpolate(x_original, (32, 32), mode='bilinear', align_corners=True)
    # }
    input_imgs_map = {
        'related_imgs': resize_4d_or_5d(x_related),
        'original_imgs': resize_4d_or_5d(x_original)
    }
    register_images(input_imgs_map, grapher, prefix=prefix)
    grapher.show()

    # return this for early stopping
    loss_val = {
        'loss_mean': loss_map['loss_mean'].clone().detach().item(),
        'acc_mean': correct_percent
    }

    # delete the data instances, see https://tinyurl.com/ycjre67m
    loss_map.clear()
    input_imgs_map.clear()
    del loss_map
    del input_imgs_map
    del x_related
    del x_original
    del labels
    gc.collect()

    # return loss and accuracy
    return loss_val
Пример #5
0
    def validate(self, epoch):
        """
        Evaluate the model on the validation set.
        """
        losses = AverageMeter()
        accs = AverageMeter()

        softmax_acc = 0

        for i, (x, y) in enumerate(self.valid_loader):
            if self.use_gpu:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # duplicate 10 times
            x = x.repeat(self.M, 1, 1, 1)

            # initialize location vector and hidden state
            self.batch_size = x.shape[0]
            h_t, l_t = self.reset()

            # extract the glimpses
            log_pi = []
            baselines = []
            for _ in range(self.num_glimpses - 1):
                # forward pass through model
                _, h_t, l_t, b_t, p = self.model(x, l_t, h_t)

                # store
                baselines.append(b_t)
                log_pi.append(p)

            # last iteration
            _, h_t, l_t, b_t, log_probas, p = self.model(
                x, l_t, h_t, last=True
            )
            log_pi.append(p)
            baselines.append(b_t)

            # convert list to tensors and reshape
            baselines = torch.stack(baselines).transpose(1, 0)
            log_pi = torch.stack(log_pi).transpose(1, 0)

            # average
            log_probas = log_probas.view(
                self.M, -1, log_probas.shape[-1]
            )
            log_probas = torch.mean(log_probas, dim=0)

            baselines = baselines.contiguous().view(
                self.M, -1, baselines.shape[-1]
            )
            baselines = torch.mean(baselines, dim=0)

            log_pi = log_pi.contiguous().view(
                self.M, -1, log_pi.shape[-1]
            )
            log_pi = torch.mean(log_pi, dim=0)

            # This miight be averaged wrong over the repition in x
            softmax_acc += softmax_accuracy(log_probas, y)

            # calculate reward
            predicted = torch.max(log_probas, 1)[1]
            R = (predicted.detach() == y).float()
            R = R.unsqueeze(1).repeat(1, self.num_glimpses)

            # compute losses for differentiable modules
            loss_action = F.nll_loss(log_probas, y)
            loss_baseline = F.mse_loss(baselines, R)

            # compute reinforce loss
            adjusted_reward = R - baselines.detach()
            loss_reinforce = torch.sum(-log_pi*adjusted_reward, dim=1)
            loss_reinforce = torch.mean(loss_reinforce, dim=0)

            # sum up into a hybrid loss
            loss = loss_action + loss_baseline + loss_reinforce

            # compute accuracy
            correct = (predicted == y).float()
            acc = 100 * (correct.sum() / len(y))

            # store
            losses.update(loss.item(), x.size()[0])
            accs.update(acc.item(), x.size()[0])

        # Average over the number of batches
        softmax_acc /= i

        # log to tensorboard per epoch instead of per iteration
        if self.use_tensorboard:
            # iteration = epoch*len(self.valid_loader) + i
            log_value('valid_loss', losses.avg, epoch)
            log_value('valid_acc', accs.avg, epoch)

        if self.use_visdom:
            # Do visdom train acc and train loss
            register_plots({'mean': np.array(losses.avg)}, self.grapher, epoch, prefix='validation loss')
            register_plots({'mean': np.array(accs.avg)}, self.grapher, epoch, prefix='validation accuracy')
            register_plots({'mean': np.array(softmax_acc)}, self.grapher, epoch, prefix='softmax validation accuracy')
            self.grapher.show()

        return losses.avg, accs.avg
Пример #6
0
    def train_one_epoch(self, epoch):
        """
        Train the model for 1 epoch of the training set.

        An epoch corresponds to one full pass through the entire
        training set in successive mini-batches.

        This is used by train() and should not be called manually.
        """
        batch_time = AverageMeter()
        losses = AverageMeter()
        accs = AverageMeter()

        softmax_acc = 0

        tic = time.time()
        with tqdm(total=self.num_train) as pbar:
            for i, (x, y) in enumerate(self.train_loader):
                if self.use_gpu:
                    x, y = x.cuda(), y.cuda()
                x, y = Variable(x), Variable(y)

                plot = False
                if (epoch % self.plot_freq == 0) and (i == 0):
                    plot = True

                # initialize location vector and hidden state
                self.batch_size = x.shape[0]
                h_t, l_t = self.reset()

                # save images
                # imgs = []
                # imgs.append(x[0:9])

                # extract the glimpses
                locs = []
                log_pi = []
                baselines = []
                glimpses = []
                for t in range(self.num_glimpses - 1):
                    # forward pass through model
                    phi, h_t, l_t, b_t, p = self.model(x, l_t, h_t)

                    # store, to look into
                    # locs.append(l_t[0:9])
                    glimpses.append(phi)
                    baselines.append(b_t)
                    log_pi.append(p)

                # last iteration
                phi, h_t, l_t, b_t, log_probas, p = self.model(
                    x, l_t, h_t, last=True
                )

                glimpses.append(phi)
                log_pi.append(p)
                baselines.append(b_t)
                locs.append(l_t[0:9])

                # convert list to tensors and reshape
                baselines = torch.stack(baselines).transpose(1, 0)
                log_pi = torch.stack(log_pi).transpose(1, 0)

                # calculate reward
                predicted = torch.max(log_probas, 1)[1]

                R = (predicted.detach() == y).float()
                R = R.unsqueeze(1).repeat(1, self.num_glimpses)

                # compute losses for differentiable modules
                loss_action = F.nll_loss(log_probas, y)
                loss_baseline = F.mse_loss(baselines, R)

                # compute reinforce loss
                # summed over timesteps and averaged across batch
                adjusted_reward = R - baselines.detach()
                loss_reinforce = torch.sum(-log_pi*adjusted_reward, dim=1)
                loss_reinforce = torch.mean(loss_reinforce, dim=0)

                # sum up into a hybrid loss
                loss = loss_action + loss_baseline + loss_reinforce

                # compute accuracy
                correct = (predicted == y).float()
                acc = 100 * (correct.sum() / len(y))
                
                # softmax accuracy
                softmax_acc += softmax_accuracy(log_probas, y)

                # store
                losses.update(loss.item(), x.size()[0])
                accs.update(acc.item(), x.size()[0])

                # compute gradients and update SGD
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # measure elapsed time
                toc = time.time()
                batch_time.update(toc-tic)

                pbar.set_description(
                    (
                        "{:.1f}s - loss: {:.3f} - acc: {:.3f}".format(
                            (toc-tic), loss.item(), acc.item()
                        )
                    )
                )
                pbar.update(self.batch_size)

        # Div by the number of batches
        softmax_acc /= i

        # Only per epoch to tensorboard
        if self.use_tensorboard:
            # iteration = epoch*len(self.train_loader) + i
            log_value('train_loss', losses.avg, epoch)
            log_value('train_acc', accs.avg, epoch)

        # Per epoch to visdom
        if self.use_visdom:
            # Do visdom train acc and train loss
            register_plots({'mean': np.array(losses.avg)}, self.grapher, epoch, prefix='train loss')
            register_plots({'mean': np.array(accs.avg)}, self.grapher, epoch, prefix='train accuracy')
            register_plots({'mean': np.array(softmax_acc)}, self.grapher, epoch, prefix='softmax train accuracy')
            self.grapher.show()

        # Todo: code glimse development over time, or location over image
        if self.use_visdom and self.visdom_images:
                phi_tensors = []
                for j, phi in enumerate(glimpses):
                    # stack all phi images from the glimpse list
                    phi_row = phi.cpu().data.detach().view((-1, self.num_patches, self.patch_size, self.patch_size))
                    phi_tensors.append(phi_row.squeeze())
                    register_images(phi_row, 'train glimpse', self.grapher, prefix='train_' + str(epoch) + '_g_' + str(j))
                    self.grapher.show()

                image_grid_tensor = torch.stack(phi_tensors).view(self.num_glimpses * self.batch_size, 1, self.patch_size, self.patch_size)
                register_images(image_grid_tensor, 'train glimpse', self.grapher, prefix='train_' + str(epoch))
                self.grapher.show()

        return losses.avg, accs.avg