Exemplo n.º 1
0
def step(args, split, epoch, loader, model, optimizer = None, M = None, f = None, tag = None):
  losses, mpjpe, mpjpe_r = AverageMeter(), AverageMeter(), AverageMeter()
  viewLosses, shapeLosses, supLosses = AverageMeter(), AverageMeter(), AverageMeter()
  
  if split == 'train':
    model.train()
  else:
    model.eval()
  bar = Bar('{}'.format(ref.category), max=len(loader))
  
  nViews = loader.dataset.nViews
  for i, (input, target, meta) in enumerate(loader):
    input_var = torch.autograd.Variable(input)
    target_var = torch.autograd.Variable(target)
    output = model(input_var)
    loss = ShapeConsistencyCriterion(nViews, supWeight = 1, unSupWeight = args.shapeWeight, M = M)(output, target_var, torch.autograd.Variable(meta))

    if split == 'test':
      for j in range(input.numpy().shape[0]):
        img = (input.numpy()[j] * 255).transpose(1, 2, 0).astype(np.uint8)
        cv2.imwrite('{}/img_{}/{}.png'.format(args.save_path, tag, i * input.numpy().shape[0] + j), img)
        gt = target.cpu().numpy()[j]
        pred = (output.data).cpu().numpy()[j]
        vis = meta.cpu().numpy()[j][5:]
        for t in range(ref.J):
          f.write('{} {} {} '.format(pred[t * 3], pred[t * 3 + 1], pred[t * 3 + 2]))
        f.write('\n')
        for t in range(ref.J):
          f.write('{} {} {} '.format(gt[t, 0], gt[t, 1], gt[t, 2]))
        f.write('\n')
        if args.saveVis:
          for t in range(ref.J):
            f.write('{} 0 0 '.format(vis[t]))
          f.write('\n')

    mpjpe_this = accuracy(output.data, target, meta)
    mpjpe_r_this = accuracy_dis(output.data, target, meta)
    shapeLoss = shapeConsistency(output.data, meta, nViews, M, split = split)

    losses.update(loss.data[0], input.size(0))
    shapeLosses.update(shapeLoss, input.size(0))
    mpjpe.update(mpjpe_this, input.size(0))
    mpjpe_r.update(mpjpe_r_this, input.size(0))
    
    if split == 'train':
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    
    Bar.suffix = '{split:10}: [{0:2}][{1:3}/{2:3}] | Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | shapeLoss {shapeLoss.avg:.6f} | AE {mpjpe.avg:.6f} | ShapeDis {mpjpe_r.avg:.6f}'.format(epoch, i, len(loader), total=bar.elapsed_td, eta=bar.eta_td, loss=losses, mpjpe=mpjpe, split = split, shapeLoss = shapeLosses, mpjpe_r = mpjpe_r)
    bar.next()
      
  bar.finish()
  return mpjpe.avg, losses.avg, shapeLosses.avg
Exemplo n.º 2
0
    def train(self):
        criterion = nn.CrossEntropyLoss().to(self.device)
        optimizer = torch.optim.Adam(self.net.parameters(),
                                     lr=self.learning_rate)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            LambdaLR(self.num_epoch, self.epoch, self.decay_epoch).step)

        total_step = len(self.train_loader)
        losses = AverageMeter()
        accuracy = AverageMeter()
        accuracy_set, loss_set, lr_set, epoch_set = self.read_loss_info()

        loss_window = self.visdom.line(Y=[1])
        lr_window = self.visdom.line(Y=[1])
        accuracy_window = self.visdom.line(Y=[1])

        for epoch in range(self.epoch, self.num_epoch):
            losses.reset()
            for step, (images, labels) in enumerate(self.train_loader):
                images = images.to(self.device)
                labels = labels.to(self.device)

                outputs = self.net(images)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                _, predicted = torch.max(outputs.data, 1)
                predicted = (predicted == labels).sum().item()

                losses.update(loss.item(), self.batch_size)
                accuracy.update(predicted / self.batch_size, self.batch_size)

                if step % 10 == 0:
                    print(
                        f'Epoch [{epoch}/{self.num_epoch}], Step [{step}/{total_step}], Loss: {losses.avg:.4f}, '
                        f'Accuracy: {accuracy.avg:.4f}')

            accuracy_set += [accuracy.avg]
            loss_set += [losses.avg]
            lr_set += [optimizer.param_groups[0]['lr']]
            epoch_set += [epoch]
            loss_window = self.visdom.line(Y=loss_set,
                                           X=epoch_set,
                                           win=loss_window,
                                           update='replace')
            lr_window = self.visdom.line(Y=lr_set,
                                         X=epoch_set,
                                         win=lr_window,
                                         update='replace')
            accuracy_window = self.visdom.line(Y=accuracy_set,
                                               X=epoch_set,
                                               win=accuracy_window,
                                               update='replace')

            self.save_loss_info(accuracy_set, loss_set, lr_set, epoch_set)
            torch.save(self.net.state_dict(),
                       '%s/vgg16-%d.pth' % (self.checkpoint_dir, epoch))
            lr_scheduler.step()
def train(args, train_loader, model, optimizer, ce_criterion, device, 
          epoch, curr_lr, model_writer, global_step):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    total_losses = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (mask, pos_vec, pose3d_discrete_seq, pose3d_discrete_gt_seq, \
        mfcc_data, beat_data) in enumerate(train_loader):
        # BS X T X 48, BS X T X 48, BS X 1 X T, BS X 1 X T, BS X T X 48
        bs = pose3d_discrete_seq.size()[0]
        timesteps = pose3d_discrete_seq.size()[1]

        # measure data loading time
        data_time.update(time.time() - end)

        # Send to device
        pose3d_discrete_seq = pose3d_discrete_seq.to(device)
        pose3d_discrete_gt_seq = pose3d_discrete_gt_seq.to(device)

        mask = mask.to(device)
        pos_vec = pos_vec.to(device)

        if args.add_mfcc and args.add_beat:
            mfcc_data_input = mfcc_data.to(device)
            beat_data_input = beat_data.to(device).long()
            pred_out = model(pose3d_discrete_seq, mask, pos_vec, \
            mfcc_feats=mfcc_data_input, beat_feats=beat_data_input) # BS X T X 48 X N_cls
        elif args.add_mfcc:
            mfcc_data_input = mfcc_data.to(device)
            pred_out = model(pose3d_discrete_seq, mask, pos_vec, mfcc_feats=mfcc_data_input) # BS X T X 48 X N_cls
        elif args.add_beat:
            beat_data_input = beat_data.to(device).long()
            pred_out = model(pose3d_discrete_seq, mask, pos_vec, beat_feats=beat_data_input) # BS X T X 48 X N_cls
        else:
            pred_out = model(pose3d_discrete_seq, mask, pos_vec) # BS X T X 48 X N_cls
       	
        r_loss = ce_criterion(pred_out, pose3d_discrete_gt_seq, mask.squeeze(1).unsqueeze(2))

        total_loss = r_loss

        model_writer.add_scalar("Loss", np.array(total_loss.item()), global_step)

        total_losses.update(total_loss.item(), 1)
        
        optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if (i % args.print_freq == 0):
            print("\n\n")
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Total Loss {total_loss.val:.4f} ({total_loss.avg:.4f})\n'
                  'lr {learning_rate:.6f}\t'
                  .format(epoch, i, len(train_loader), batch_time=batch_time,
                          data_time=data_time, 
                          total_loss=total_losses,
                          learning_rate=curr_lr))

        global_step += 1

    return global_step, total_losses.avg
Exemplo n.º 4
0
    def run_epoch(self, phase, epoch, data_loader):
        model_with_loss = self.model_with_loss
        if phase == 'train':
            model_with_loss.train()

        else:
            if len(self.opt.gpus) > 1:
                model_with_loss = self.model_with_loss.module
            model_with_loss.eval()
            torch.cuda.empty_cache()

        opt = self.opt
        results = {}
        data_time, batch_time = AverageMeter(), AverageMeter()
        avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
        num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
        bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
        end = time.time()
        for iter_id, batch in enumerate(data_loader):
            if iter_id >= num_iters:
                break
            data_time.update(time.time() - end)

            for k in batch:
                if k != 'meta':
                    batch[k] = batch[k].to(device=opt.device,
                                           non_blocking=True)
            output, loss, loss_stats = model_with_loss(batch)
            loss = loss.mean()
            if phase == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            batch_time.update(time.time() - end)
            end = time.time()

            Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
                epoch,
                iter_id,
                num_iters,
                phase=phase,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            for l in avg_loss_stats:
                avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                         batch['input'].size(0))
                Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(
                    l, avg_loss_stats[l].avg)
            if not opt.hide_data_time:
                Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
                  '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
            if opt.print_iter > 0:
                if iter_id % opt.print_iter == 0:
                    print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
            else:
                bar.next()

            if opt.debug > 0:
                self.debug(batch, output, iter_id)

            if opt.test:
                self.save_result(output, batch, results)
            del output, loss, loss_stats

        bar.finish()
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        ret['time'] = bar.elapsed_td.total_seconds() / 60.
        return ret, results
Exemplo n.º 5
0
    def test(self):
        self.feature_extractor.eval()
        self.classifier.eval()
        prec1_fs = AverageMeter()
        prec1_ft = AverageMeter()
        counter_all_fs = torch.FloatTensor(
            self.opt.DATASET.NUM_CLASSES).fill_(0)
        counter_all_ft = torch.FloatTensor(
            self.opt.DATASET.NUM_CLASSES).fill_(0)
        counter_acc_fs = torch.FloatTensor(
            self.opt.DATASET.NUM_CLASSES).fill_(0)
        counter_acc_ft = torch.FloatTensor(
            self.opt.DATASET.NUM_CLASSES).fill_(0)
        class_weight = torch.zeros(self.num_classes)
        class_weight = class_weight.cuda()
        count = 0

        for i, (input, target) in enumerate(self.test_data['loader']):
            input, target = to_cuda(input), to_cuda(target)
            with torch.no_grad():
                feature_test = self.feature_extractor(input)
                output_test = self.classifier(feature_test)
                prob = F.softmax(output_test[:, self.num_classes:], dim=1)
                class_weight = class_weight + prob.data.sum(0)
                count = count + input.size(0)

            if self.opt.EVAL_METRIC == 'accu':
                prec1_fs_iter = accuracy(output_test[:, :self.num_classes],
                                         target)
                prec1_ft_iter = accuracy(output_test[:, self.num_classes:],
                                         target)
                prec1_fs.update(prec1_fs_iter, input.size(0))
                prec1_ft.update(prec1_ft_iter, input.size(0))
                if i % self.opt.PRINT_STEP == 0:
                    print("  Test:epoch: %d:[%d/%d], AccFs: %3f, AccFt: %3f" % \
                          (self.epoch, i, len(self.test_data['loader']), prec1_fs.avg, prec1_ft.avg))
            elif self.opt.EVAL_METRIC == 'accu_mean':
                prec1_ft_iter = accuracy(output_test[:, self.num_classes:],
                                         target)
                prec1_ft.update(prec1_ft_iter, input.size(0))
                counter_all_fs, counter_acc_fs = accuracy_for_each_class(
                    output_test[:, :self.num_classes], target, counter_all_fs,
                    counter_acc_fs)
                counter_all_ft, counter_acc_ft = accuracy_for_each_class(
                    output_test[:, self.num_classes:], target, counter_all_ft,
                    counter_acc_ft)
                if i % self.opt.PRINT_STEP == 0:
                    print("  Test:epoch: %d:[%d/%d], Task: %3f" % \
                          (self.epoch, i, len(self.test_data['loader']), prec1_ft.avg))
            else:
                raise NotImplementedError
        acc_for_each_class_fs = counter_acc_fs / counter_all_fs
        acc_for_each_class_ft = counter_acc_ft / counter_all_ft
        log = open(os.path.join(self.opt.SAVE_DIR, 'log.txt'), 'a')
        log.write("\n")
        class_weight = class_weight / count
        class_weight = class_weight / max(class_weight)
        if self.opt.EVAL_METRIC == 'accu':
            log.write(
                "                                                          Test:epoch: %d, AccFs: %3f, AccFt: %3f" % \
                (self.epoch, prec1_fs.avg, prec1_ft.avg))
            log.close()
            return class_weight, max(prec1_fs.avg, prec1_ft.avg)
        elif self.opt.EVAL_METRIC == 'accu_mean':
            log.write(
                "                                            Test:epoch: %d, AccFs: %3f, AccFt: %3f" % \
                (self.epoch,acc_for_each_class_fs.mean(), acc_for_each_class_ft.mean()))
            log.write(
                "\nClass-wise Acc of Ft:")  ## based on the task classifier.
            for i in range(self.opt.DATASET.NUM_CLASSES):
                if i == 0:
                    log.write("%dst: %3f" % (i + 1, acc_for_each_class_ft[i]))
                elif i == 1:
                    log.write(",  %dnd: %3f" %
                              (i + 1, acc_for_each_class_ft[i]))
                elif i == 2:
                    log.write(", %drd: %3f" %
                              (i + 1, acc_for_each_class_ft[i]))
                else:
                    log.write(", %dth: %3f" %
                              (i + 1, acc_for_each_class_ft[i]))
            log.close()
            return class_weight, max(acc_for_each_class_ft.mean(),
                                     acc_for_each_class_fs.mean())
Exemplo n.º 6
0
def train(epoch, writer):

    # define meters
    loss_meter = AverageMeter()

    # put model into training mode
    model.train()

    # set this only when it is finetuning
    # for module in model.modules():
    #     if isinstance(module, torch.nn.modules.BatchNorm1d):
    #         module.eval()
    #     if isinstance(module, torch.nn.modules.BatchNorm2d):
    #         module.eval()
    #     if isinstance(module, torch.nn.modules.BatchNorm3d):
    #         module.eval()

    for param_group in optimizer.param_groups:
        print('learning rate: {}'.format(param_group['lr']))

    for i, sample in enumerate(tqdm(train_dataset_it)):

        im = sample['image']
        instances = sample['instance'].squeeze()
        class_labels = sample['label'].squeeze()

        output = model(im)
        loss = criterion(output, instances, class_labels, **args['loss_w'])
        loss = loss.mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #output.detach().cpu()
        #torch.cuda.empty_cache()

        if args['display'] and i % args['display_it'] == 0:
            with torch.no_grad():
                visualizer.display(im[0], 'image')

                predictions = cluster.cluster_with_gt(
                    output[0],
                    instances[0],
                    n_sigma=args['loss_opts']['n_sigma'])
                visualizer.display([predictions.cpu(), instances[0].cpu()],
                                   'pred')

                sigma = output[0][2].cpu()
                sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min())
                sigma[instances[0] == 0] = 0
                visualizer.display(sigma, 'sigma')

                seed = torch.sigmoid(output[0][3]).cpu()
                visualizer.display(seed, 'seed')

        loss_meter.update(loss.item())

    if args['tensorboard']:
        with torch.no_grad():
            color_map = draw_flow(torch.tanh(output[0][0:2]))
            seed = torch.sigmoid(output[0][3:11]).cpu()
            sigma = output[0][2].cpu()
            sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min())
            sigma[instances[0] == 0] = 0

            #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma'])

            color_map = color_map.transpose(2, 0, 1)

            seed_visual = seed.unsqueeze(1)

            seed_show = vutils.make_grid(seed_visual,
                                         nrow=8,
                                         normalize=True,
                                         scale_each=True)

            writer.add_image('Input', im[0], epoch)
            writer.add_image('InstanceGT',
                             instances[0].unsqueeze(0).cpu().numpy(), epoch)
            writer.add_image('ColorMap', color_map, epoch)
            writer.add_image('SeedMap', seed_show, epoch)
            writer.add_image('SigmaMap',
                             sigma.unsqueeze(0).cpu().numpy(), epoch)
            #writer.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch)

    return loss_meter.avg
  def run_epoch(self, phase, epoch, data_loader, logger=None):
    model_with_loss = self.model_with_loss
    if phase == 'train':
      model_with_loss.train()
    else:
      if len(self.opt.gpus) > 1:
        model_with_loss = self.model_with_loss.module
      model_with_loss.eval()
      torch.cuda.empty_cache()

    opt = self.opt
    results = {}
    data_time, batch_time = AverageMeter(), AverageMeter()
    avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
    num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
    bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
    end = time.time()
    for iter_id, batch in enumerate(data_loader):
      if iter_id >= num_iters:
        break
      data_time.update(time.time() - end)

      for k in batch:
        if k != 'meta':
          batch[k] = batch[k].to(device=opt.device, non_blocking=True)    
      output, loss, loss_stats = model_with_loss(batch)
      if self.reconstruct_img:
        file_path = '/data/mry/code/CenterNet/debug_conflict_bt_class_recon'
        self.save_tensor_to_img(output['reconstruct_img'], batch['meta']['file_name'], file_path)
      loss = loss.mean()
      if phase == 'train':
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
      batch_time.update(time.time() - end)
      end = time.time()

      Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
        epoch, iter_id, num_iters, phase=phase,
        total=bar.elapsed_td, eta=bar.eta_td)
      for l in avg_loss_stats:
        if l == 'KL_loss':
          if loss_stats[l] is not None:
            avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0))
            Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l])
          else:
            avg_loss_stats[l].update(0, batch['input'].size(0))
          continue
        avg_loss_stats[l].update(
          loss_stats[l].mean().item(), batch['input'].size(0))
        Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
      if not opt.hide_data_time:
        Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
          '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)


      if logger and iter_id % opt.logger_iteration == 0:
        logger.write_iteration(
          '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
        epoch, iter_id, num_iters, phase=phase,
        total=bar.elapsed_td, eta=bar.eta_td))
        for l in avg_loss_stats:
          if loss_stats[l] is None:
            continue
          avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0))
          logger.write_iteration('|{} {:.4f} '.format(l, avg_loss_stats[l].avg))
          logger.scalar_summary('train_iteration_{}'.format(l), avg_loss_stats[l].avg, (epoch-1)*num_iters+iter_id)
        logger.write_iteration('\n')

      if opt.print_iter > 0:
        if iter_id % opt.print_iter == 0:
          print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 
      else:
        bar.next()
      
      if opt.debug > 0:
        self.debug(batch, output, iter_id)
      
      if opt.test:
        self.save_result(output, batch, results)
      del output, loss, loss_stats
    
    bar.finish()
    ret = {k: v.avg for k, v in avg_loss_stats.items()}
    ret['time'] = bar.elapsed_td.total_seconds() / 60.
    return ret, results
Exemplo n.º 8
0
    def train_learner(self, x_train, y_train):
        self.before_train(x_train, y_train)
        # set up loader
        train_dataset = dataset_transform(
            x_train, y_train, transform=transforms_match[self.data])
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=self.batch,
                                       shuffle=True,
                                       num_workers=0,
                                       drop_last=True)
        # setup tracker
        losses_batch = AverageMeter()
        acc_batch = AverageMeter()

        # set up model
        self.model.train()

        for ep in range(self.epoch):
            for i, batch_data in enumerate(train_loader):
                # batch update
                batch_x, batch_y = batch_data
                batch_x = maybe_cuda(batch_x, self.cuda)
                batch_y = maybe_cuda(batch_y, self.cuda)

                # update the running fisher
                if (ep * len(train_loader) + i +
                        1) % self.fisher_update_after == 0:
                    self.update_running_fisher()

                out = self.forward(batch_x)
                loss = self.total_loss(out, batch_y)
                if self.params.trick['kd_trick']:
                    loss = 1 / (self.task_seen + 1) * loss + (1 - 1 / (self.task_seen + 1)) * \
                                   self.kd_manager.get_kd_loss(out, batch_x)
                if self.params.trick['kd_trick_star']:
                    loss = 1 / ((self.task_seen + 1) ** 0.5) * loss + \
                           (1 - 1 / ((self.task_seen + 1) ** 0.5)) * self.kd_manager.get_kd_loss(out, batch_x)
                # update tracker
                losses_batch.update(loss.item(), batch_y.size(0))
                _, pred_label = torch.max(out, 1)
                acc = (pred_label == batch_y).sum().item() / batch_y.size(0)
                acc_batch.update(acc, batch_y.size(0))
                # backward
                self.opt.zero_grad()
                loss.backward()

                # accumulate the fisher of current batch
                self.accum_fisher()
                self.opt.step()

                if i % 100 == 1 and self.verbose:
                    print('==>>> it: {}, avg. loss: {:.6f}, '
                          'running train acc: {:.3f}'.format(
                              i, losses_batch.avg(), acc_batch.avg()))

        # save params for current task
        for n, p in self.weights.items():
            self.prev_params[n] = p.clone().detach()

        # update normalized fisher of current task
        max_fisher = max([torch.max(m) for m in self.running_fisher.values()])
        min_fisher = min([torch.min(m) for m in self.running_fisher.values()])
        for n, p in self.running_fisher.items():
            self.normalized_fisher[n] = (p - min_fisher) / (max_fisher -
                                                            min_fisher + 1e-32)
        self.after_train()
        model.train()
        for data in train_loader:
            origin_x, origin_y = data[0].cuda(), data[1].cuda()
            positive_x, positive_y = data[2].cuda(), data[3].cuda()
            negative_x, negative_y = data[4].cuda(), data[5].cuda()

            origin_x_feat, origin_y_hat = model(origin_x)
            positive_x_feat, positive_y_hat = model(positive_x)
            negative_x_feat, negative_y_hat = model(negative_x)
            # loss_tp = torch.FloatTensor([0.]).cuda()
            loss_tp = criterion_TP (origin_x_feat, positive_x_feat, negative_x_feat)
            loss_ce = (criterion_CE (origin_y_hat, origin_y) + criterion_CE (positive_y_hat, origin_y) + criterion_CE (
                negative_y_hat, negative_y)) / 3

            acc_1, acc_3 = accuracy(origin_y_hat, origin_y, topk=(1, 3))
            avg_acc.update(acc_1.item(), data[0].shape[0])
            avg_triplet.update(loss_tp.item(), data[0].shape[0])
            avg_ce.update(loss_ce.item(), data[0].shape[0])

            loss = loss_tp + loss_ce

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        scheduler.step()

        y_hat_list = []
        y_list = []
        model.eval()
        with torch.no_grad():
Exemplo n.º 10
0
def inference():

    model = ACNet_models_V1.ACNet(num_class=40, pretrained=False)
    load_ckpt(model, None, args.last_ckpt, device)
    model.eval()
    model.to(device)

    val_data = ACNet_data.SUNRGBD(transform=torchvision.transforms.Compose(
        [scaleNorm(), ToTensor(), Normalize()]),
                                  phase_train=False,
                                  data_dir=args.data_dir)
    val_loader = DataLoader(val_data,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    acc_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    a_meter = AverageMeter()
    b_meter = AverageMeter()
    with torch.no_grad():
        for batch_idx, sample in enumerate(val_loader):
            #todo batch=1,这里要查看sample的size,决定怎么填装image depth label,估计要用到for循环
            origin_image = sample['origin_image'].numpy()
            origin_depth = sample['origin_depth'].numpy()
            image = sample['image'].to(device)
            depth = sample['depth'].to(device)
            label = sample['label'].numpy()

            with torch.no_grad():
                pred = model(image, depth)

            output = torch.max(pred, 1)[1] + 1
            output = output.squeeze(0).cpu().numpy()

            acc, pix = accuracy(output, label)
            intersection, union = intersectionAndUnion(output, label,
                                                       args.num_class)
            acc_meter.update(acc, pix)
            a_m, b_m = macc(output, label, args.num_class)
            intersection_meter.update(intersection)
            union_meter.update(union)
            a_meter.update(a_m)
            b_meter.update(b_m)
            print('[{}] iter {}, accuracy: {}'.format(
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                batch_idx, acc))

            # img = image.cpu().numpy()
            # print('origin iamge: ', type(origin_image))
            if args.visualize:
                visualize_result(origin_image, origin_depth, label - 1,
                                 output - 1, batch_idx, args)

    iou = intersection_meter.sum / (union_meter.sum + 1e-10)
    for i, _iou in enumerate(iou):
        print('class [{}], IoU: {}'.format(i, _iou))

    mAcc = (a_meter.average() / (b_meter.average() + 1e-10))
    print(mAcc.mean())
    print('[Eval Summary]:')
    print('Mean IoU: {:.4}, Accuracy: {:.2f}%'.format(
        iou.mean(),
        acc_meter.average() * 100))
Exemplo n.º 11
0
    def train(self):

        #load data loader
        train_loader, valid_loader = self.dataset.get_train_validation_data_loaders(
        )

        #define optimier
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad,
                                             self.model.parameters()),
                                      self.train_config['lr'],
                                      weight_decay=eval(
                                          self.train_config['weight_decay']))
        n_steps = self.train_config["epochs"] * len(train_loader)

        #learning rate schudler
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               T_max=n_steps)
        warmup_scheduler = warmup.UntunedLinearWarmup(optimizer)

        if apex_support and self.config['fp16_precision']:
            self.model, optimizer = amp.initialize(self.model,
                                                   optimizer,
                                                   opt_level='O2',
                                                   keep_batchnorm_fp32=True)

        model_checkpoints_folder = os.path.join(self.writer.log_dir,
                                                'checkpoints')

        # save config file
        save_config_file(model_checkpoints_folder)
        logger.info("***** Running training *****")
        logger.info("  Total optimization steps = %d", n_steps)

        n_iter = 0
        valid_n_iter = 0
        best_valid_loss = np.inf
        losses = AverageMeter()

        for epoch_counter in range(self.train_config['epochs']):
            self.model.train()
            # self.model.apply(set_bn_eval)
            epoch_iterator = tqdm(train_loader,
                                  desc="Training (X / X Steps) (loss=X.X)",
                                  bar_format="{l_bar}{r_bar}",
                                  dynamic_ncols=True)

            for [xis, xjs], labels in epoch_iterator:
                optimizer.zero_grad()

                loss = self._step(xis, xjs, labels)
                losses.update(loss.item(), self.config["batch_size"])

                if n_iter % self.train_config['log_every_n_steps'] == 0:
                    self.writer.add_scalar('train_loss',
                                           loss,
                                           global_step=n_iter)

                if apex_support and self.train_config['fp16_precision']:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                optimizer.step()
                n_iter += 1

                epoch_iterator.set_description(
                    "Training (%d / %d Epochs) (loss=%2.5f)" %
                    (epoch_counter, self.train_config['epochs'], losses.val))

                # warmup for the first 10 epochs
                scheduler.step(scheduler.last_epoch + 1)
                warmup_scheduler.dampen()

            # validate the model if requested
            if epoch_counter % self.train_config['eval_every_n_epochs'] == 0:
                valid_loss = self._validate(valid_loader)
                if valid_loss < best_valid_loss:
                    # save the model weights
                    best_valid_loss = valid_loss
                    torch.save(
                        self.model.state_dict(),
                        os.path.join(model_checkpoints_folder, 'model.pth'))

                self.writer.add_scalar('validation_loss',
                                       valid_loss,
                                       global_step=valid_n_iter)
                valid_n_iter += 1

            self.writer.add_scalar('cosine_lr_decay',
                                   scheduler.get_lr()[0],
                                   global_step=n_iter)
Exemplo n.º 12
0
def predict(args, predict_data_loader, model, result_path):
    model.eval()
    batch_time = AverageMeter()
    end = time.time()

    for iter, (image, person_name, picname,
               imt) in enumerate(predict_data_loader):
        # batchsize = 1 ,so squeeze dim 1
        image = image.squeeze()
        person_name = person_name[0]
        #print(image_name)

        with torch.no_grad():
            # batch test for memory reduce
            batch = 1
            pred_seg = torch.zeros(image.shape[0], image.shape[2],
                                   image.shape[3])
            #pred_cls = torch.zeros(image.shape[0], 3)
            for i in range(0, image.shape[0], batch):
                start_id = i
                end_id = i + batch
                if end_id > image.shape[0]:
                    end_id = image.shape[0]
                image_batch = image[start_id:end_id, :, :, :]
                image_var = Variable(image_batch).cuda()
                # model forward
                output_seg = model(image_var)
                _, pred_batch = torch.max(output_seg, 1)
                pred_seg[start_id:end_id, :, :] = pred_batch.cpu().data
                #pred_cls[start_id:end_id, :] = output_cls.cpu().data

            pred_seg = pred_seg.numpy().astype('uint8')  # predict label
            #pred_det = pred_cls.numpy().astype('float32')

            if args.vis:
                imt = (imt.squeeze().numpy()).astype('uint8')
                #ant = label.numpy().astype('uint8')
                save_dir = osp.join(result_path, 'vis', person_name)
                if not exists(save_dir):
                    os.makedirs(save_dir)
                vis_predict(imt, pred_seg, pred_seg, save_dir, picname)
                print('save vis, finished!')

            batch_time.update(time.time() - end)
        # save seg result
        if args.seg:
            save_dir = osp.join(result_path, 'segment')
            if not exists(save_dir):
                os.makedirs(save_dir)
            np.save(osp.join(save_dir, image_name + '_labelMark_volumes'),
                    pred_seg)
            print('save segment result, finished!')
        # save cls result
        #if args.det:
        #    save_dir = osp.join(result_path, 'segment')
        #   if not exists(save_dir):
        #      os.makedirs(save_dir)
        #  np.save(osp.join(save_dir, image_name + '_labelMark_detections'), pred_det)
        #  print('save detection result, finished!')

        end = time.time()
        logger_vis.info(
            'Eval: [{0}/{1}]\t'
            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(
                iter, len(predict_data_loader), batch_time=batch_time))
Exemplo n.º 13
0
def evaluate(models, val_loader, interp, criterion, history, epoch, args):
    print('***Evaluating at {} epoch ...'.format(epoch))
    loss_meter = AverageMeter()
    acc_meter = AverageMeter()

    models.eval()

    for i, batch_data in enumerate(val_loader):
        torch.cuda.synchronize()
        # forward pass
        images, labels, _ = batch_data

        images = images.to(device)
        labels = labels.to(device)

        pred_seg = models(images)
        pred_seg = interp(pred_seg)
        # pred_seg = F.softmax(pred_seg)

        loss = criterion(pred_seg, labels)
        loss_meter.update(loss.data.item())
        print('[Eval] iter {}, loss: {}'.format(i, loss.data.item()))

        #acc = pixel_acc(pred_seg, labels)
        #acc_meter.update(acc.data.item())

        labels = as_numpy(labels)
        _, pred = torch.max(pred_seg, dim=1)
        pred = as_numpy(pred.squeeze(0).cpu())
        acc, pix = accuracy(pred, labels)
        acc_meter.update(acc, pix)

        if args.visualize:
            visualize_result(batch_data, pred_seg, args)

    history['val']['epoch'].append(epoch)
    history['val']['loss'].append(loss_meter.average())
    history['val']['acc'].append(acc_meter.average())
    print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format(
        epoch, loss_meter.average(),
        acc_meter.average() * 100))

    # Plot figure
    if epoch > 0:
        print('Plotting loss figure...')
        fig = plt.figure()
        plt.plot(np.asarray(history['train']['epoch']),
                 np.log(np.asarray(history['train']['loss'])),
                 color='b',
                 label='training')
        plt.plot(np.asarray(history['val']['epoch']),
                 np.log(np.asarray(history['val']['loss'])),
                 color='c',
                 label='validation')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Log(loss)')
        fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=225)
        plt.close('all')

        fig = plt.figure()
        plt.plot(history['train']['epoch'],
                 history['train']['acc'],
                 color='b',
                 label='training')
        plt.plot(history['val']['epoch'],
                 history['val']['acc'],
                 color='c',
                 label='validation')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=225)
        plt.close('all')
Exemplo n.º 14
0
def train(models, train_loader, interp, optimizers, criterion, history, epoch,
          args):
    batch_time = AverageMeter()
    data_time = AverageMeter()

    # loss_value = 0

    # Switch to train mode
    models.train()

    # main loop
    tic = time.time()
    for i_iter, batch_data in enumerate(train_loader):
        cur_iter = i_iter + (epoch - 1) * args.epoch_iters
        # measure data loading time
        torch.cuda.synchronize()
        data_time.update(time.time() - tic)

        # optimizers.zero_grad()
        # cur_iter = i_iter + (epoch - 1) * args.epoch_iters
        # adjust_learning_rate(optimizers, cur_iter, args)

        # forward pass
        images, labels, _ = batch_data
        # print(images.type())

        # feed input data
        # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        images = images.to(device)
        labels = labels.to(device)
        #print(labels.shape)
        #print(labels.size())
        #print(labels)

        optimizers.zero_grad()
        adjust_learning_rate(optimizers, cur_iter, args)

        pred_seg = models(images)
        #print(pred_seg)

        pred_seg = interp(pred_seg)
        # pred_seg = F.softmax(pred_seg)

        loss = criterion(pred_seg, labels)
        # loss_value += loss.item()
        #print(loss)
        # acc = pixel_acc(pred_seg, labels)
        # acc, _ = accuracy(pred_seg, labels)

        # loss = loss.mean()
        # acc = acc.mean()

        # Backward / compute gradient and do SGD step
        # optimizers.zero_grad()
        loss.backward()
        # optimizers.step()
        #ave_total_loss.update(loss.data.item())
        #ave_acc.update(acc.data.item() * 100)

        # loss_value += loss.data.cpu().numpy().item()
        # loss_value += loss.data.item()
        # loss_value += loss.item()
        # loss_value += loss.data.cpu().numpy()[0]
        # loss_value += loss.cpu().numpy()[0]

        optimizers.step()

        #loss_value += loss.item()
        # loss_value += loss.data.cpu().numpy().item()
        # loss_value = loss.data.item()

        # Measure elapsed time
        batch_time.update(time.time() - tic)
        tic = time.time()

        #loss_value += loss.data.cpu().numpy().item()

        # Update average loss and acc
        # acc = pixel_acc(pred_seg, labels)
        # ave_total_loss.update(loss.data.item())
        # ave_acc.update(acc.data.item() * 100)

        if i_iter % args.display_iter == 0:
            acc = pixel_acc(pred_seg, labels)
            print('Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, '
                  'LR: {:.6f}  '
                  'Accurary: {:4.2f}, Loss: {:.6f}  '.format(
                      epoch, i_iter, args.epoch_iters, batch_time.average(),
                      data_time.average(), args.running_lr,
                      acc.data.item() * 100, loss.data.item()))

            fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters
            history['train']['epoch'].append(fractional_epoch)
            history['train']['loss'].append(loss.data.item())
            history['train']['acc'].append(acc.data.item())
Exemplo n.º 15
0
def train(epoch):
    model.train()
    nIters = len(train_loader)
    # bar = Bar('==>', max=nIters)
    Loss, Acc = AverageMeter(), AverageMeter()
    start_time = time.time()

    for i, (inp_img, down_img, pose) in enumerate(train_loader):

        input_img = Variable(inp_img).float().cuda()
        target_img = Variable(down_img).float().cuda()
        input_pose = Variable(
            torch.FloatTensor(getPreds(pose.cpu().numpy())).view(
                -1, 32)).float().cuda()

        recon_img, recon_pose, h_img, h_pose = model(input_img, input_pose)

        ll_loss_img = criterion(recon_img, target_img)
        ll_loss_pose = criterion(recon_pose, input_pose)
        h_img_copy, h_pose_copy = h_img.detach(), h_pose.detach()
        dissim_loss = F.mse_loss(h_img, h_pose_copy) + F.mse_loss(
            h_pose, h_img_copy)
        # add kl-div loss for each ae to get vaes
        # add kl-div loss for dissimilarity

        total_loss = ll_loss_img * opt.img_recon_wt + ll_loss_pose * opt.pose_recon_wt + dissim_loss * opt.dissim_wt
        Loss.update(total_loss.data[0], inp_img.size(0))
        img_to_pose = model.forward_i_to_p(h_img)
        Acc.update(
            Accuracy_Reg((img_to_pose.data.view(-1, 16, 2)).cpu().numpy(),
                         (input_pose.data.view(-1, 16, 2)).cpu().numpy()))
        del img_to_pose
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        curr_time = time.time()

        print(
            '{split} Epoch: [{0}][{1}/{2}]| Total: {total:f} | ETA: {eta:f} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f} )'
            .format(epoch,
                    i,
                    nIters,
                    total=curr_time - start_time,
                    eta=(curr_time - start_time) * (nIters - i + 1) / (i + 1),
                    loss=Loss,
                    Acc=Acc,
                    split='train'))

        if i % save_interval == 0:
            n = min(input_img.size(0), 4)
            orig_i, recon_i, orig_p, recon_p = target_img[:
                                                          n].data, recon_img[:n].data, makeSkel_64(
                                                              input_pose[:n].
                                                              data, (0, 0, 255)
                                                          ), makeSkel_64(
                                                              recon_pose[:n].
                                                              data,
                                                              (255, 0, 0))
            img_to_pose = model.forward_i_to_p(h_img[:n])
            pose_to_img = model.forward_p_to_i(h_pose[:n])
            i_to_p, p_to_i = makeSkel_64(img_to_pose.data,
                                         (0, 0, 255)), pose_to_img.data
            comparison = torch.cat(
                [orig_i, recon_i, orig_p, recon_p, i_to_p, p_to_i])
            save_image(comparison,
                       saveDir_results + 'reconstruction_' + str(epoch) + "_" +
                       str(i) + '.png',
                       nrow=n)
            print("Saving results for epoch : {0}, progress : {1:.0f}".format(
                epoch, 100 * i / len(train_loader)))
Exemplo n.º 16
0
def train(args, train_loader, model, auxiliarynet, criterion, optimizer,
          epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    error = AverageMeter('error', ':6.2f')

    progress = ProgressMeter(len(train_loader),
                             batch_time,
                             data_time,
                             losses,
                             error,
                             prefix="Train Epoch: [{}]".format(epoch))
    # switch to train mode
    model.train()
    auxiliarynet.train()
    end = time.time()
    for batch_idx, (patch, gaze_norm_g, head_norm,
                    rot_vec_norm) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        patch.requires_grad = False
        patch = patch.to(args.device)

        gaze_norm_g.requires_grad = False
        gaze_norm_g = gaze_norm_g.to(args.device)

        head_norm.requires_grad = False
        head_norm = head_norm.to(args.device)

        rot_vec_norm.requires_grad = False
        rot_vec_norm = rot_vec_norm.to(args.device)

        # model = model.to(args.device)
        # auxiliarynet = auxiliarynet.to(args.device)
        gaze_pred, features = model(patch)
        # print(features.size())
        hp_pred = auxiliarynet(features)
        head_norm = 10 * head_norm
        gaze_norm_g = 100 * gaze_norm_g
        loss = criterion(gaze_norm_g, head_norm, gaze_pred, hp_pred)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        angle_error = mean_angle_error(
            gaze_pred.cpu().detach().numpy() / 100,
            gaze_norm_g.cpu().detach().numpy() / 100,
            rot_vec_norm.cpu().detach().numpy())

        losses.update(loss.item())
        error.update(angle_error)

        if (batch_idx + 1) % args.print_freq == 0:
            progress.print(batch_idx + 1)
    return losses.get_avg(), error.get_avg()
Exemplo n.º 17
0
    def train_learner(self, x_train, y_train):
        self.before_train(x_train, y_train)

        # set up loader
        train_dataset = dataset_transform(
            x_train, y_train, transform=transforms_match[self.data])
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=self.batch,
                                       shuffle=True,
                                       num_workers=0,
                                       drop_last=True)
        # set up model
        self.model = self.model.train()

        # setup tracker
        losses_batch = AverageMeter()
        acc_batch = AverageMeter()

        for ep in range(self.epoch):
            for i, batch_data in enumerate(train_loader):
                # batch update
                batch_x, batch_y = batch_data
                batch_x = maybe_cuda(batch_x, self.cuda)
                batch_y = maybe_cuda(batch_y, self.cuda)
                for j in range(self.mem_iters):
                    logits = self.forward(batch_x)
                    loss = self.criterion(logits, batch_y)
                    if self.params.trick['kd_trick']:
                        loss = 1 / (self.task_seen + 1) * loss + (1 - 1 / (self.task_seen + 1)) * \
                                    self.kd_manager.get_kd_loss(logits, batch_x)
                    if self.params.trick['kd_trick_star']:
                        loss = 1 / ((self.task_seen + 1) ** 0.5) * loss + \
                               (1 - 1 / ((self.task_seen + 1) ** 0.5)) * self.kd_manager.get_kd_loss(logits, batch_x)
                    _, pred_label = torch.max(logits, 1)
                    correct_cnt = (pred_label
                                   == batch_y).sum().item() / batch_y.size(0)
                    # update tracker
                    acc_batch.update(correct_cnt, batch_y.size(0))
                    losses_batch.update(loss, batch_y.size(0))
                    # backward
                    self.opt.zero_grad()
                    loss.backward()

                    if self.task_seen > 0:
                        # sample from memory of previous tasks
                        mem_x, mem_y = self.buffer.retrieve()
                        if mem_x.size(0) > 0:
                            params = [
                                p for p in self.model.parameters()
                                if p.requires_grad
                            ]
                            # gradient computed using current batch
                            grad = [p.grad.clone() for p in params]
                            mem_x = maybe_cuda(mem_x, self.cuda)
                            mem_y = maybe_cuda(mem_y, self.cuda)
                            mem_logits = self.forward(mem_x)
                            loss_mem = self.criterion(mem_logits, mem_y)
                            self.opt.zero_grad()
                            loss_mem.backward()
                            # gradient computed using memory samples
                            grad_ref = [p.grad.clone() for p in params]

                            # inner product of grad and grad_ref
                            prod = sum([
                                torch.sum(g * g_r)
                                for g, g_r in zip(grad, grad_ref)
                            ])
                            if prod < 0:
                                prod_ref = sum(
                                    [torch.sum(g_r**2) for g_r in grad_ref])
                                # do projection
                                grad = [
                                    g - prod / prod_ref * g_r
                                    for g, g_r in zip(grad, grad_ref)
                                ]
                            # replace params' grad
                            for g, p in zip(grad, params):
                                p.grad.data.copy_(g)
                    self.opt.step()
                # update mem
                self.buffer.update(batch_x, batch_y)

                if i % 100 == 1 and self.verbose:
                    print('==>>> it: {}, avg. loss: {:.6f}, '
                          'running train acc: {:.3f}'.format(
                              i, losses_batch.avg(), acc_batch.avg()))
        self.after_train()
Exemplo n.º 18
0
def evaluate():
    model = ACNet_models_V1.ACNet(num_class=5, pretrained=False)
    load_ckpt(model, None, None, args.last_ckpt, device)
    model.eval()
    model.to(device)

    val_data = ACNet_data.FreiburgForest(
        transform=torchvision.transforms.Compose([
            ACNet_data.ScaleNorm(),
            ACNet_data.ToTensor(),
            ACNet_data.Normalize()
        ]),
        data_dirs=[args.test_dir],
        modal1_name=args.modal1,
        modal2_name=args.modal2,
    )
    val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=True)

    acc_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    a_meter = AverageMeter()
    b_meter = AverageMeter()
    with torch.no_grad():
        for batch_idx, sample in enumerate(val_loader):
            modal1 = sample['modal1'].to(device)
            modal2 = sample['modal2'].to(device)
            label = sample['label'].numpy()
            basename = sample['basename'][0]

            with torch.no_grad():
                pred = model(modal1, modal2)

            output = torch.argmax(pred, 1) + 1
            output = output.squeeze(0).cpu().numpy()

            acc, pix = accuracy(output, label)
            intersection, union = intersectionAndUnion(output, label, args.num_class)
            acc_meter.update(acc, pix)
            a_m, b_m = macc(output, label, args.num_class)
            intersection_meter.update(intersection)
            union_meter.update(union)
            a_meter.update(a_m)
            b_meter.update(b_m)
            print('[{}] iter {}, accuracy: {}'
                  .format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc))

            if args.visualize:
                visualize_result(modal1, modal2, label, output, batch_idx, args)

            if args.save_predictions:
                colored_output = utils.color_label_eval(output).astype(np.uint8)
                imageio.imwrite(f'{args.output_dir}/{basename}_pred.png', colored_output.transpose([1, 2, 0]))

    iou = intersection_meter.sum / (union_meter.sum + 1e-10)
    for i, _iou in enumerate(iou):
        print('class [{}], IoU: {}'.format(i, _iou))

    mAcc = (a_meter.average() / (b_meter.average() + 1e-10))
    print(mAcc.mean())
    print('[Eval Summary]:')
    print('Mean IoU: {:.4}, Accuracy: {:.2f}%'
          .format(iou.mean(), acc_meter.average() * 100))
Exemplo n.º 19
0
def train(genertor, discriminator, iterator, interp, optimizer, optimizer_D,
          criterion, criterion_bce, history, epoch, args):
    batch_time = AverageMeter()
    data_time = AverageMeter()

    # laber for adversarial training
    S1_label = 0
    S2_label = 1

    genertor.train()
    discriminator.train()

    # main loop
    tic = time.time()
    for i_iter in range(args.epoch_iters):
        loss_seg_value_S1 = 0
        loss_seg_value_S2 = 0
        loss_seg_value_La = 0

        loss_adv_pred_value = 0
        loss_D_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for param in discriminator.parameters():
            param.requires_grad = False

        _, batch_data = next(iterator)  # use  enumerate()
        data_time.update(time.time() - tic)
        # batch_data = next(trainloader_iter)   # use  iter()
        images, labels, infos = batch_data

        # images, labels, _ = batch_data
        # print(images, labels)

        # feed input data
        input_img = Variable(images, volatile=False)  # train:False , val: True
        label_seg = Variable(labels.long(), volatile=False)  # long() ???
        input_img = input_img.cuda()
        label_seg = label_seg.cuda()
        #print(label_seg)
        #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size()))

        pred_S2, _, pred_S1 = genertor(input_img)

        pred_S1 = interp(pred_S1)  # --> [ B x 150 x 321 x 321 ]
        pred_S2 = interp(pred_S2)
        #print(pred_G2.size())
        #print(pred_G2.type())

        # input size (torch.Size([4, 150, 321, 321])) Target size (torch.Size([4, 321, 321])
        loss_seg_S1 = criterion(pred_S1, label_seg)
        loss_seg_S2 = criterion(pred_S2, label_seg)

        # produce mask
        #pred_label = pred_S2.data.cpu().numpy().argmax(axis=1)
        pred_label = pred_S1.data.cpu().numpy().argmax(axis=1)
        pred_label = torch.from_numpy(pred_label)
        pred_label = Variable(pred_label.long()).cuda()

        #loss_seg_La = criterion(pred_S2, pred_label)  # / 1.65
        loss_seg_La = criterion(pred_S2, label_seg)  # / 1.65

        D_out_S1 = interp(discriminator(
            F.softmax(pred_S1)))  # --> [B x 1 x 321 x 321]
        D_out_S2 = interp(discriminator(F.softmax(pred_S2)))

        #loss_adv_pred = criterion_bce(D_out_S1, Variable(torch.FloatTensor(D_out_S1.data.size()).fill_(S2_label)).cuda())
        loss_adv_pred = criterion_bce(
            D_out_S2,
            Variable(torch.FloatTensor(
                D_out_S2.data.size()).fill_(S1_label)).cuda())

        loss_weakly = args.lambda_seg_La * loss_seg_La
        #loss_weakly = args.lambda_seg_La * (1 - (loss_seg_La / loss_seg_S2))**2

        #loss = args.lambda_seg_S1 * loss_seg_S1
        loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred

        #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La * loss_seg_La
        #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La *  (1 - (loss_seg_La / loss_seg_S2))**2
        # proper normalization
        #loss_1.backward()  # detach()

        loss_weakly.backward(retain_graph=True)
        loss.backward()

        loss_seg_value_S1 += loss_seg_S1.data.cpu().numpy()[0]
        loss_seg_value_S2 += loss_seg_S2.data.cpu().numpy()[0]
        loss_seg_value_La += loss_seg_La.data.cpu().numpy()[0]
        loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0]

        # train D
        # model_D.train()
        # optimizer_D.zero_grad()

        # bring back requires_grad
        for param in discriminator.parameters():
            param.requires_grad = True

        # train S1
        pred_S1 = pred_S1.detach()
        D_out_S1 = interp(discriminator(F.softmax(pred_S1)))
        loss_D = criterion_bce(
            D_out_S1,
            Variable(torch.FloatTensor(
                D_out_S1.data.size()).fill_(S1_label)).cuda())

        loss_D = loss_D / 2.0
        loss_D.backward()
        loss_D_value += loss_D.data.cpu().numpy()[0]

        # train S2
        pred_S2 = pred_S2.detach()
        D_out_S2 = interp(discriminator(F.softmax(pred_S2)))
        loss_D = criterion_bce(
            D_out_S2,
            Variable(torch.FloatTensor(
                D_out_S2.data.size()).fill_(S2_label)).cuda())

        loss_D = loss_D / 2.0
        loss_D.backward()
        loss_D_value += loss_D.data.cpu().numpy()[0]

        optimizer.step()
        optimizer_D.step()

        # measure elapsed time
        batch_time.update(time.time() - tic)
        tic = time.time()

        # calculate accuracy , mIOU, and display
        if i_iter % args.disp_iter == 0:  # can not change
            acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred_S1)

            #print('exp = {}'.format(args.checkpoints_dir))
            print(
                'iter =[{0:d}]/[{1:d}/{2:d}], Time: {3:.2f}, Data: {4:.2f}, loss_seg_S1 = {5:.4f} loss_seg_S2 = {6:.4f} loss_seg_La = {7:.4f}, loss_adv_pred = {8:.4f}, loss_D = {9:.4f}, Accurarcy: {10:4.2f}%'
                .format(epoch, i_iter, args.epoch_iters, batch_time.average(),
                        data_time.average(), loss_seg_value_S1,
                        loss_seg_value_S2, loss_seg_value_La,
                        loss_adv_pred_value, loss_D_value,
                        acc_pred_outputs * 100))

            fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters
            history['train']['epoch'].append(fractional_epoch)
            history['train']['loss_pred_outputs'].append(loss_seg_S1.data[0])
            history['train']['acc_pred_outputs'].append(acc_pred_outputs)

        #  checkpoint
        if epoch == args.num_epoches and i_iter >= args.epoch_iters - 1:
            print('taking checkpoints latest ...')
            torch.save(
                genertor.state_dict(),
                osp.join(
                    args.checkpoints_dir,
                    str(args.generatormodel) + '_' + str(epoch) + 'epoch_' +
                    str(args.epoch_iters) + '_latest.pth'))
            torch.save(
                discriminator.state_dict(),
                osp.join(
                    args.checkpoints_dir,
                    str(args.generatormodel) + '_' + str(epoch) + 'epoch_' +
                    str(args.epoch_iters) + '_D_latest.pth'))

        loss_seg_S1 = history['train']['loss_pred_outputs'][-1]
        if loss_seg_S1 < args.best_loss:
            args.best_loss = loss_seg_S1
            print('taking checkpoints best ...')
            torch.save(
                genertor.state_dict(),
                osp.join(
                    args.checkpoints_dir,
                    str(args.generatormodel) + '_' + str(args.epoch_iters) +
                    '_train_best.pth'))
            torch.save(
                discriminator.state_dict(),
                osp.join(
                    args.checkpoints_dir,
                    str(args.generatormodel) + '_' + str(args.epoch_iters) +
                    '_D_train_best.pth'))
Exemplo n.º 20
0
    def _train_epoch(self, start_time):
        train_loss = AverageMeter()

        for step, batch in enumerate(self.train_loader):
            self.model.train()
            batch = tuple(t.to(self.device) for t in batch)
            batch_size = batch[1].size(0)

            op = batch[0]
            inputs = {
                "input_ids_a": batch[1],
                "token_type_ids_a": batch[2],
                "attention_mask_a": batch[3],
                "input_ids_b": batch[4],
                "token_type_ids_b": batch[5],
                "attention_mask_b": batch[6],
                "input_ids_c": batch[7],
                "token_type_ids_c": batch[8],
                "attention_mask_c": batch[9],
            }
            if self.fts_flag:
                inputs.update({
                    "x_a": batch[10],
                    "x_b": batch[11],
                    "x_c": batch[12]
                })
            # anchor, positive, negative = self.model(**inputs)
            outputs = self.model(**inputs)

            if type(outputs) not in (tuple, list):  # tuple
                outputs = (outputs, )

            loss = self.criterion(op.float(), *outputs)
            train_loss.update(loss.item(), batch_size)

            if self.gradient_accumulation_steps > 1:
                loss = loss / self.gradient_accumulation_steps

            with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()

            if (step + 1) % self.gradient_accumulation_steps == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()

            self.global_step += 1

            if (step + 1) % 20 == 0:
                rate = self.optimizer.get_lr()
                now_epoch = (self.global_step * self.batch_size /
                             len(self.train_loader.dataset))
                self.logger.info(
                    f"{rate[0]:.7f} "
                    f"{self.global_step / 1000:5.2f} "
                    f"{now_epoch:6.2f}  | "
                    f"{train_loss.avg:.4f}            | "
                    f'{time_to_str((timer() - start_time), "sec")}  '
                    f"{torch.cuda.memory_allocated() // 1024 ** 2}")

        train_log = {"loss": train_loss.avg}
        return train_log
Exemplo n.º 21
0
def evaluate(genertor, val_loader, interp, criterion, history, epoch, args):
    print('Evaluating at {} epochs...'.format(epoch))
    loss_pred_outputs_meter = AverageMeter()
    acc_pred_outputs_meter = AverageMeter()

    # switch to eval mode
    genertor.eval()

    for i, batch_data in enumerate(val_loader):
        # forward pass
        #_, batch_data = next(iterator)  # use  enumerate()
        #data_time.update(time.time() - tic)
        # batch_data = next(trainloader_iter)   # use  iter()
        images, labels, infos = batch_data

        # images, labels, _ = batch_data
        # print(images, labels)

        # feed input data
        input_img = Variable(images, volatile=True)  # train:False , val: True
        label_seg = Variable(labels.long(), volatile=True)  # long() ???
        input_img = input_img.cuda()
        label_seg = label_seg.cuda()
        #print(label_seg)
        #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size()))

        pred1, _, pred2 = genertor(input_img)

        pred1 = interp(pred1)  # --> [ B x 150 x 321 x 321 ]
        pred2 = interp(pred2)

        #pred1 = nn.functional.log_softmax(pred1)
        #pred2 = nn.functional.log_softmax(pred2)
        #pred_outputs = nn.functional.log_softmax(pred_outputs)

        loss_pred_outputs = criterion(pred2, label_seg)
        loss_pred_outputs_meter.update(loss_pred_outputs.data[0])
        print('[Eval] iter {}, loss_pred_outputs:{}'.format(
            i, loss_pred_outputs.data[0]))

        acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred2)
        acc_pred_outputs_meter.update(acc_pred_outputs, pix_pred_outputs)

        if args.visualize:
            visualize_tv(batch_data, pred1, pred2, args)

    history['val']['epoch'].append(epoch)
    history['val']['loss_pred_outputs'].append(
        loss_pred_outputs_meter.average())
    history['val']['acc_pred_outputs'].append(acc_pred_outputs_meter.average())

    print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format(
        epoch, loss_pred_outputs_meter.average(),
        acc_pred_outputs_meter.average() * 100))

    # plot figure
    if epoch > 0:
        print('Plotting loss figure...')
        fig = plt.figure()
        plt.plot(np.asarray(history['train']['epoch']),
                 np.log(np.asarray(history['train']['loss_pred_outputs'])),
                 color='b',
                 label='training')

        plt.plot(np.asarray(history['val']['epoch']),
                 np.log(np.asarray(history['val']['loss_pred_outputs'])),
                 color='c',
                 label='validation')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Log(loss)')
        fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=200)
        plt.close('all')

        fig = plt.figure()
        plt.plot(history['train']['epoch'],
                 history['train']['acc_pred_outputs'],
                 color='b',
                 label='training')
        plt.plot(history['val']['epoch'],
                 history['val']['acc_pred_outputs'],
                 color='c',
                 label='validation')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=200)
        plt.close('all')
    """
Exemplo n.º 22
0
def val(epoch, writer_val):

    # define meters
    loss_meter, iou_meter = AverageMeter(), AverageMeter()

    # put model into eval mode
    model.eval()

    with torch.no_grad():

        for i, sample in enumerate(tqdm(val_dataset_it)):

            im = sample['image']
            instances = sample['instance'].squeeze()
            class_labels = sample['label'].squeeze()

            output = model(im)
            loss = criterion(output,
                             instances,
                             class_labels,
                             **args['loss_w'],
                             iou=True,
                             iou_meter=iou_meter)
            loss = loss.mean()

            if args['display'] and i % args['display_it'] == 0:
                with torch.no_grad():
                    visualizer.display(im[0], 'image')

                    predictions = cluster.cluster_with_gt(
                        output[0],
                        instances[0],
                        n_sigma=args['loss_opts']['n_sigma'])
                    visualizer.display([predictions.cpu(), instances[0].cpu()],
                                       'pred')

                    sigma = output[0][2].cpu()
                    sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min())
                    sigma[instances[0] == 0] = 0
                    visualizer.display(sigma, 'sigma')

                    seed = torch.sigmoid(output[0][3]).cpu()
                    visualizer.display(seed, 'seed')

            loss_meter.update(loss.item())

        if args['tensorboard']:
            with torch.no_grad():
                color_map = draw_flow(torch.tanh(output[0][0:2]))
                seed = torch.sigmoid(output[0][3:11]).cpu()
                sigma = output[0][2].cpu()
                sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min())
                sigma[instances[0] == 0] = 0

                #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma'])

                color_map = color_map.transpose(2, 0, 1)

                seed_visual = seed.unsqueeze(1)

                seed_show = vutils.make_grid(seed_visual,
                                             nrow=8,
                                             normalize=True,
                                             scale_each=True)

                writer_val.add_image('Input', im[0], epoch)
                writer_val.add_image('InstanceGT',
                                     instances[0].unsqueeze(0).cpu().numpy(),
                                     epoch)
                writer_val.add_image('ColorMap', color_map, epoch)
                writer_val.add_image('SeedMap', seed_show, epoch)
                writer_val.add_image('SigmaMap',
                                     sigma.unsqueeze(0).cpu().numpy(), epoch)
                #writer_val.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch)

    return loss_meter.avg, iou_meter.avg
def train(P, epoch, model, criterion, optimizer, scheduler, loader, adversary, logger=None):

    if logger is None:
        log_ = print
    else:
        log_ = logger.log

    batch_time = AverageMeter()
    data_time = AverageMeter()

    losses = dict()
    losses['mrt'] = AverageMeter()
    losses['con'] = AverageMeter()
    losses['adv'] = AverageMeter()

    check = time.time()
    for n, (images, labels) in enumerate(loader):
        model.train()
        count = n * P.n_gpus  # number of trained samples

        data_time.update(time.time() - check)
        check = time.time()

        labels = labels.to(device)

        batch_size = images[0].size(0)
        images_aug1, images_aug2 = images[0].to(device), images[1].to(device)
        images_pair = torch.cat([images_aug1, images_aug2], dim=0)  # 2B

        loss_adv, loss_mart, outputs_adv, = mart_loss(
            model, images_pair, labels.repeat(2), optimizer, distance=P.distance,
            eps_iter=P.alpha, eps=P.epsilon, nb_iter=P.n_iters,
            beta=P.beta, clip_min=0, clip_max=1, return_adv=True
        )

        ### consistency regularization ###
        outputs_adv1, outputs_adv2 = outputs_adv.chunk(2)
        loss_con = _jensen_shannon_div(outputs_adv1, outputs_adv2, P.T)

        ### total loss ###
        loss_con *= P.lam
        loss = loss_mart + loss_adv + loss_con

        loss.backward()
        optimizer.step()

        lr = optimizer.param_groups[0]['lr']

        batch_time.update(time.time() - check)

        ### Log losses ###
        losses['mrt'].update(loss_mart.item(), batch_size)
        losses['con'].update(loss_con.item(), batch_size)
        losses['adv'].update(loss_adv.item(), batch_size)

        if count % 50 == 0:
            log_('[Epoch %3d; %3d] [Time %.3f] [Data %.3f] [LR %.5f]\n'
                 '[LossMRT %f] [LossCon %f] [LossAdv %f]' %
                 (epoch, count, batch_time.value, data_time.value, lr,
                  losses['mrt'].value, losses['con'].value,
                  losses['adv'].value))

        check = time.time()

    if P.optimizer == 'sgd':
        scheduler.step()

    log_('[DONE] [Time %.3f] [Data %.3f] [LossMRT %f] '
         '[LossCon %f] [LossAdv %f]' %
         (batch_time.average, data_time.average,
          losses['mrt'].average, losses['con'].average,
          losses['adv'].average))

    if logger is not None:
        logger.scalar_summary('train/loss_mart', losses['mrt'].average, epoch)
        logger.scalar_summary('train/loss_con', losses['con'].average, epoch)
        logger.scalar_summary('train/loss_adversary', losses['adv'].average, epoch)
        logger.scalar_summary('train/batch_time', batch_time.average, epoch)
Exemplo n.º 24
0
    def update_network(self, **kwargs):
        stop = False
        self.train_data['source']['iterator'] = iter(
            self.train_data['source']['loader'])
        self.train_data['target']['iterator'] = iter(
            self.train_data['target']['loader'])
        self.iters_per_epoch = len(self.train_data['target']['loader'])
        iters_counter_within_epoch = 0
        data_time = AverageMeter()
        batch_time = AverageMeter()
        classifier_loss = AverageMeter()
        feature_extractor_loss = AverageMeter()
        prec1_fs = AverageMeter()
        prec1_ft = AverageMeter()
        self.feature_extractor.train()
        self.classifier.train()
        end = time.time()
        if self.opt.TRAIN.PROCESS_COUNTER == 'epoch':
            self.lam = 2 / (1 + math.exp(
                -1 * 10 * self.epoch / self.opt.TRAIN.MAX_EPOCH)) - 1
            self.update_lr()
            print('value of lam is: %3f' % (self.lam))
        while not stop:
            if self.opt.TRAIN.PROCESS_COUNTER == 'iteration':
                self.lam = 2 / (1 + math.exp(
                    -1 * 10 * self.iters /
                    (self.opt.TRAIN.MAX_EPOCH * self.iters_per_epoch))) - 1
                print('value of lam is: %3f' % (self.lam))
                self.update_lr()
            source_data, source_gt = self.get_samples('source')
            target_data, _ = self.get_samples('target')
            source_data = to_cuda(source_data)
            source_gt = to_cuda(source_gt)
            target_data = to_cuda(target_data)
            data_time.update(time.time() - end)

            feature_source = self.feature_extractor(source_data)
            output_source = self.classifier(feature_source)
            feature_target = self.feature_extractor(target_data)
            output_target = self.classifier(feature_target)

            weight_concate = torch.cat((self.class_weight, self.class_weight))
            loss_task_fs = self.CELossWeight(
                output_source[:, :self.num_classes], source_gt,
                self.class_weight)
            loss_task_ft = self.CELossWeight(
                output_source[:, self.num_classes:], source_gt,
                self.class_weight)
            loss_discrim_source = self.CELossWeight(output_source, source_gt,
                                                    weight_concate)
            loss_discrim_target = self.TargetDiscrimLoss(output_target)
            loss_summary_classifier = loss_task_fs + loss_task_ft + loss_discrim_source + loss_discrim_target

            source_gt_for_ft_in_fst = source_gt + self.num_classes
            loss_confusion_source = 0.5 * self.CELossWeight(
                output_source, source_gt,
                weight_concate) + 0.5 * self.CELossWeight(
                    output_source, source_gt_for_ft_in_fst, weight_concate)
            loss_confusion_target = self.ConcatenatedCELoss(output_target)
            loss_summary_feature_extractor = loss_confusion_source + self.lam * loss_confusion_target

            self.optimizer_classifier.zero_grad()
            loss_summary_classifier.backward(retain_graph=True)
            self.optimizer_classifier.step()

            self.optimizer_feature_extractor.zero_grad()
            loss_summary_feature_extractor.backward()
            self.optimizer_feature_extractor.step()

            classifier_loss.update(loss_summary_classifier,
                                   source_data.size()[0])
            feature_extractor_loss.update(loss_summary_feature_extractor,
                                          source_data.size()[0])
            prec1_fs.update(
                accuracy(output_source[:, :self.num_classes], source_gt),
                source_data.size()[0])
            prec1_ft.update(
                accuracy(output_source[:, self.num_classes:], source_gt),
                source_data.size()[0])

            print("  Train:epoch: %d:[%d/%d], LossCla: %3f, LossFeat: %3f, AccFs: %3f, AccFt: %3f" % \
                  (self.epoch, iters_counter_within_epoch, self.iters_per_epoch, classifier_loss.avg, feature_extractor_loss.avg, prec1_fs.avg, prec1_ft.avg))

            batch_time.update(time.time() - end)
            end = time.time()
            self.iters += 1
            iters_counter_within_epoch += 1
            if iters_counter_within_epoch >= self.iters_per_epoch:
                log = open(os.path.join(self.opt.SAVE_DIR, 'log.txt'), 'a')
                log.write("\n")
                log.write("  Train:epoch: %d:[%d/%d], LossCla: %3f, LossFeat: %3f, AccFs: %3f, AccFt: %3f" % \
                  (self.epoch, iters_counter_within_epoch, self.iters_per_epoch, classifier_loss.avg, feature_extractor_loss.avg, prec1_fs.avg, prec1_ft.avg))
                log.close()
                stop = True
Exemplo n.º 25
0
def main(args):
    # np.random.seed(args.seed)
    # torch.manual_seed(args.seed)
    # cudnn.benchmark = True
    train_loader, test_loader = get_data(args)
    
    model = ALE()
    
    print(model)
    
    model = nn.DataParallel(model).cuda()
    
    # Optimizer
    """
    if hasattr(model.module, 'base'):
        base_param_ids = set(map(id, model.module.base.parameters()))
        new_params = [p for p in model.parameters() if
                      id(p) not in base_param_ids]
        param_groups = [
            {'params': model.module.base.parameters(), 'lr_mult': 0.1},
            {'params': new_params, 'lr_mult': 1.0}]
    else:
        param_groups = model.parameters()
    """
    param_groups = model.parameters()
    optimizer = torch.optim.ADAM(param_groups, lr=args.lr,
                                weight_decay=args.weight_decay)

    def adjust_lr(epoch):
        if epoch in [80]:
            lr = 0.1 * args.lr
            print('=====> adjust lr to {}'.format(lr))
            for g in optimizer.param_groups:
                g['lr'] = lr * g.get('lr_mult', 1)

    for epoch in range(0, args.epochs):
        adjust_lr(epoch)
        model.train()

        loss = AverageMeter()
        iteration  = 935 * epoch
        # print(iteration)

        for i,d in enumerate(train_loader):
            iteration += 1

            img_embeds, class_embeds, metas = d

            optimizer.zero_grad()

            comps = model(img_embeds)
            mse_loss = nn.L1Loss(size_average=False)(outputs, attr_targets)
            mse_loss = mse_loss/args.batch_size
            loss.update(mse_loss.data[0], img_embeds.size(0))
            
            mse_loss.backward()
            optimizer.step()

            vis.line(X=torch.ones((1,)) * iteration,
                     Y=torch.Tensor((loss.avg,)),
                     win='reid softmax loss of network',
                     update='append' if iteration > 0 else None,
                     opts=dict(xlabel='iteration', title='Loss', legend=['Loss'])
                     )

            if (i + 1) % args.print_freq == 0:
                print('Epoch: [{}][{}/{}]\t Loss {:.6f} ({:.6f})\t'
                      .format(epoch, i + 1, len(train_loader),
                              loss.val, loss.avg))

            save_checkpoint({
                'state_dict': model.module.state_dict(),
                'epoch': epoch + 1,
                'best_top1': 0,
            }, False, fpath=osp.join(args.model_dir, 'checkpoint.pth.tar'))
        test(test_loader, test_cls_list, test_attrs, args.model_dir)
def validate(args, val_loader, model, ce_criterion, device, 
          epoch, model_writer, global_step):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    total_losses = AverageMeter()

    # switch to train mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (mask, pos_vec, pose3d_discrete_seq, pose3d_discrete_gt_seq, \
            mfcc_data, beat_data) in enumerate(val_loader):
            # BS X T X 48, BS X T X 48, BS X 1 X T, BS X 1 X T, BS X T X 48
            bs = pose3d_discrete_seq.size()[0]
            timesteps = pose3d_discrete_seq.size()[1]

            # measure data loading time
            data_time.update(time.time() - end)

            # Send to device
            pose3d_discrete_seq = pose3d_discrete_seq.to(device)
            pose3d_discrete_gt_seq = pose3d_discrete_gt_seq.to(device)

            mask = mask.to(device)
            pos_vec = pos_vec.to(device)

            if args.add_mfcc and args.add_beat:
                mfcc_data_input = mfcc_data.to(device)
                beat_data_input = beat_data.to(device).long()
                pred_out = model(pose3d_discrete_seq, mask, pos_vec, \
                mfcc_feats=mfcc_data_input, beat_feats=beat_data_input) # BS X T X 48 X N_cls
            elif args.add_mfcc:
                mfcc_data_input = mfcc_data.to(device)
                pred_out = model(pose3d_discrete_seq, mask, pos_vec, mfcc_feats=mfcc_data_input) # BS X T X 48 X N_cls
            elif args.add_beat:
                beat_data_input = beat_data.to(device).long()
                pred_out = model(pose3d_discrete_seq, mask, pos_vec, beat_feats=beat_data_input) # BS X T X 48 X N_cls
            else:
                pred_out = model(pose3d_discrete_seq, mask, pos_vec) # BS X T X 48 X N_cls

            r_loss = ce_criterion(pred_out, pose3d_discrete_gt_seq, mask.squeeze(1).unsqueeze(2))

            total_loss = r_loss

            model_writer.add_scalar("VAL Loss", np.array(total_loss.item()), global_step)

            total_losses.update(total_loss.item(), 1)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if (i % args.print_freq == 0):
                print("\n\n")
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Total Loss {total_loss.val:.4f} ({total_loss.avg:.4f})\n'
                      .format(epoch, i, len(val_loader), batch_time=batch_time,
                              data_time=data_time, 
                              total_loss=total_losses,
                              ))

    return total_losses.avg
Exemplo n.º 27
0
def train(cfg, trainLoader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    lossesRealCe = AverageMeter()
    lossesRealMmd = AverageMeter()
    lossesFakeMmd = AverageMeter()

    writer = SummaryWriter(logdir=os.path.join(cfg.MISC.OUTPUT_PATH, 'run',
                                               '{}'.format(cfg.CFG_NAME)))

    model.train()

    for i, data in enumerate(trainLoader):
        input = Variable(data['img'].cuda())
        label = Variable(data['label'].cuda())
        domain = Variable(data['domain'].cuda())

        # forward
        output, fc = model(input)

        # select nir and vis data
        idxReal = torch.nonzero(label.data != -1)
        idxReal = Variable(idxReal[:, 0])

        outputReal = torch.index_select(output, dim=0, index=idxReal)
        fcReal = torch.index_select(fc, dim=0, index=idxReal)
        labelReal = torch.index_select(label, 0, idxReal)
        domainReal = torch.index_select(domain, 0, idxReal)

        lossRealCe = criterion(outputReal, labelReal) * cfg.TRAIN.LAMBDA_CE

        # select real data
        idxNirReal = torch.nonzero(domainReal.data != 1)
        idxNirReal = Variable(idxNirReal[:, 0])
        fcNirReal = torch.index_select(fcReal, 0, idxNirReal)

        idxVisReal = torch.nonzero(domainReal.data != 0)
        idxVisReal = Variable(idxVisReal[:, 0])
        fcVisReal = torch.index_select(fcReal, 0, idxVisReal)

        lossRealMmd = cfg.TRAIN.LAMBDA_MMD * mmdLoss(fcVisReal, fcNirReal)

        # select fake data
        idxFake = torch.nonzero(label.data == -1)
        idxFake = Variable(idxFake[:, 0])

        fcFake = torch.index_select(fc, 0, idxFake)
        domainFake = torch.index_select(domain, 0, idxFake)

        # select domain of fake data
        idxNirFake = torch.nonzero(domainFake.data != 1)
        idxNirFake = Variable(idxNirFake[:, 0])
        fcNirFake = torch.index_select(fcFake, 0, idxNirFake)

        idxVisFake = torch.nonzero(domainFake.data != 0)
        idxVisFake = Variable(idxVisFake[:, 0])
        fcVisFake = torch.index_select(fcFake, 0, idxVisFake)

        lossFakeMmd = cfg.TRAIN.LAMBDA_MMD * mmdLoss(fcNirFake, fcVisFake)

        lossHFR = lossRealCe + lossRealMmd + lossFakeMmd
        optimizer.zero_grad()
        # TODO(hanyang): need to retain_graph=True??
        lossHFR.backward(retain_graph=True)
        optimizer.step()

        # measure accuracy and record loss
        lossesRealCe.update(lossRealCe.item(), outputReal.size(0))
        lossesRealMmd.update(lossRealMmd.item(), 1)
        lossesFakeMmd.update(lossFakeMmd.item(), 1)

        prec1, prec5 = accuracy(outputReal.data, labelReal.data, topk=(1, 5))
        top1.update(prec1.item(), outputReal.size(0))
        top5.update(prec5.item(), outputReal.size(0))

        # summary writer
        # writer.add_scalar('loss/cross_entropy', lossesRealCe.avg, epoch)
        # writer.add_scalar('loss/real_mmd', lossesRealMmd.avg, epoch)
        # writer.add_scalar('loss/fake_mmd', lossesFakeMmd.avg, epoch)

        if i % cfg.TRAIN.PRINT_FREQ == 0:
            # if True:
            info = '===> Epoch [{:0>3d}][{:3d}/{:3d}] | '.format(
                epoch, i, len(trainLoader))
            info += 'Loss: real ce: {:4.6f} ({:4.6f}) real mmd: {:4.6f} ({:4.6f}) fake mmd: {:4.6f} ({:4.6f}) | '.format(
                lossesRealCe.val, lossesRealCe.avg, lossesRealMmd.val,
                lossesRealMmd.avg, lossesFakeMmd.val, lossesFakeMmd.avg)
            info += 'Prec@1 : {:4.3f} ({:4.3f}) Prec@5 : {:4.3f} ({:4.3f})'.format(
                top1.val, top1.avg, top5.val, top5.avg)
            print(info)
Exemplo n.º 28
0
def train(P,
          epoch,
          model,
          criterion,
          optimizer,
          scheduler,
          loader,
          logger=None,
          simclr_aug=None,
          linear=None,
          linear_optim=None):

    if epoch == 1:
        # define optimizer and save in P (argument)
        milestones = [
            int(0.6 * P.epochs),
            int(0.75 * P.epochs),
            int(0.9 * P.epochs)
        ]

        linear_optim = torch.optim.SGD(linear.parameters(),
                                       lr=1e-1,
                                       weight_decay=P.weight_decay)
        P.linear_optim = linear_optim
        P.linear_scheduler = lr_scheduler.MultiStepLR(P.linear_optim,
                                                      gamma=0.1,
                                                      milestones=milestones)

    if logger is None:
        log_ = print
    else:
        log_ = logger.log

    batch_time = AverageMeter()
    data_time = AverageMeter()

    losses = dict()
    losses['cls'] = AverageMeter()

    check = time.time()
    for n, (images, labels) in enumerate(loader):
        model.eval()
        count = n * P.n_gpus  # number of trained samples

        data_time.update(time.time() - check)
        check = time.time()

        ### SimCLR loss ###
        if P.dataset != 'imagenet':
            batch_size = images.size(0)
            images = images.to(device)
            images = hflip(images)  # 2B with hflip
        else:
            batch_size = images[0].size(0)
            images = images[0].to(device)

        labels = labels.to(device)

        images = simclr_aug(images)  # simclr augmentation
        _, outputs_aux = model(images, penultimate=True)
        penultimate = outputs_aux['penultimate'].detach()

        outputs = linear(penultimate[0:batch_size]
                         )  # only use 0 degree samples for linear eval

        loss_ce = criterion(outputs, labels)

        ### CE loss ###
        P.linear_optim.zero_grad()
        loss_ce.backward()
        P.linear_optim.step()

        ### optimizer learning rate ###
        lr = P.linear_optim.param_groups[0]['lr']

        batch_time.update(time.time() - check)

        ### Log losses ###
        losses['cls'].update(loss_ce.item(), batch_size)

        if count % 50 == 0:
            log_('[Epoch %3d; %3d] [Time %.3f] [Data %.3f] [LR %.5f]\n'
                 '[LossC %f]' % (
                     epoch,
                     count,
                     batch_time.value,
                     data_time.value,
                     lr,
                     losses['cls'].value,
                 ))
        check = time.time()

    P.linear_scheduler.step()

    log_('[DONE] [Time %.3f] [Data %.3f] [LossC %f]' %
         (batch_time.average, data_time.average, losses['cls'].average))

    if logger is not None:
        logger.scalar_summary('train/loss_cls', losses['cls'].average, epoch)
        logger.scalar_summary('train/batch_time', batch_time.average, epoch)
Exemplo n.º 29
0
        score = model(sample_train)

        # calculate loss
        loss = criterion(score, target_train)
        # zero the gradient buffer before calculating the gradients in the current step.
        optimizer.zero_grad()
        # backpropagation
        loss.backward()
        # update weights; a gradient descent step
        optimizer.step()

        ############
        # step log #
        ############
        # log loss for this batch
        loss_meter.update(to_scalar(loss))
        # write to tensorboard; used for visualization
        writer.add_scalar('train/total_loss_iter', to_scalar(loss),
                          step + 1 + dataset_L * epoch)
        # print the log for every "steps_per_log" batches or the final batch
        if (step + 1) % cfg.steps_per_log == 0 or (step +
                                                   1) % len(train_loader) == 0:
            log = '{}, Step {}/{} in Ep {}, {:.2f}s, loss:{:.4f}'.format( \
            time_str(), step+1, dataset_L, epoch+1, time.time()-step_st, loss_meter.val)
            print(log)

    # update the learning rate
    scheduler.step()
    ##############
    # epoch log  #
    ##############
def train_val(model, args):

    train_dir = args.train_dir
    val_dir = args.val_dir

    config = Config(args.config)
    cudnn.benchmark = True

    # train
    train_loader = torch.utils.data.DataLoader(
        lsp_lspet_data.LSP_Data(
            'lspet',
            train_dir,
            8,
            Mytransforms.Compose([
                Mytransforms.RandomResized(),  # 这个cpmpose写得还蛮有意思的
                Mytransforms.RandomRotate(40),
                Mytransforms.RandomCrop(368),
                Mytransforms.RandomHorizontalFlip(),
            ])),
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.workers,
        pin_memory=True)
    # val
    if args.val_dir is not None and config.test_interval != 0:
        # val
        val_loader = torch.utils.data.DataLoader(lsp_lspet_data.LSP_Data(
            'lsp', val_dir, 8,
            Mytransforms.Compose([
                Mytransforms.TestResized(368),
            ])),
                                                 batch_size=config.batch_size,
                                                 shuffle=True,
                                                 num_workers=config.workers,
                                                 pin_memory=True)

    criterion = nn.MSELoss().cuda()

    params, multiple = get_parameters(model, config, False)

    optimizer = torch.optim.SGD(params,
                                config.base_lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_list = [AverageMeter() for i in range(6)]  # 6个loss
    end = time.time()
    iters = config.start_iters
    best_model = config.best_model

    heat_weight = 46 * 46 * 15 / 1.0

    while iters < config.max_iter:

        for i, (input, heatmap, centermap) in enumerate(train_loader):

            learning_rate = adjust_learning_rate(
                optimizer,
                iters,
                config.base_lr,
                policy=config.lr_policy,
                policy_parameter=config.policy_parameter,
                multiple=multiple)
            data_time.update(time.time() - end)

            heatmap = heatmap.cuda(async=True)
            centermap = centermap.cuda(async=True)

            input_var = torch.autograd.Variable(input)
            heatmap_var = torch.autograd.Variable(heatmap)
            centermap_var = torch.autograd.Variable(centermap)

            heat1, heat2, heat3, heat4, heat5, heat6 = model(
                input_var, centermap_var)
            # 使用intermedia supervise方法,计算每个阶段的loss
            loss1 = criterion(heat1, heatmap_var) * heat_weight
            loss2 = criterion(heat2, heatmap_var) * heat_weight
            loss3 = criterion(heat3, heatmap_var) * heat_weight
            loss4 = criterion(heat4, heatmap_var) * heat_weight
            loss5 = criterion(heat5, heatmap_var) * heat_weight
            loss6 = criterion(heat6, heatmap_var) * heat_weight

            loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
            losses.update(loss.data[0], input.size(0))
            for cnt, l in enumerate([loss1, loss2, loss3, loss4, loss5,
                                     loss6]):
                losses_list[cnt].update(l.data[0], input.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_time.update(time.time() - end)
            end = time.time()

            iters += 1
            if iters % config.display == 0:
                print(
                    'Train Iteration: {0}\t'
                    'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                    'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                    'Learning rate = {2}\n'
                    'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                        iters,
                        config.display,
                        learning_rate,
                        batch_time=batch_time,
                        data_time=data_time,
                        loss=losses))
                for cnt in range(0, 6):
                    print(
                        'Loss{0} = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t'.
                        format(cnt + 1, loss1=losses_list[cnt]))

                print time.strftime(
                    '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
                    time.localtime())

                batch_time.reset()
                data_time.reset()
                losses.reset()
                for cnt in range(6):
                    losses_list[cnt].reset()

            save_checkpoint({
                'iter': iters,
                'state_dict': model.state_dict(),
            }, 0, args.model_name)

            # val
            if args.val_dir is not None and config.test_interval != 0 and iters % config.test_interval == 0:

                model.eval()
                for j, (input, heatmap, centermap) in enumerate(val_loader):
                    heatmap = heatmap.cuda(async=True)
                    centermap = centermap.cuda(async=True)

                    input_var = torch.autograd.Variable(input)
                    heatmap_var = torch.autograd.Variable(heatmap)
                    centermap_var = torch.autograd.Variable(centermap)

                    heat1, heat2, heat3, heat4, heat5, heat6 = model(
                        input_var, centermap_var)

                    loss1 = criterion(heat1, heatmap_var) * heat_weight
                    loss2 = criterion(heat2, heatmap_var) * heat_weight
                    loss3 = criterion(heat3, heatmap_var) * heat_weight
                    loss4 = criterion(heat4, heatmap_var) * heat_weight
                    loss5 = criterion(heat5, heatmap_var) * heat_weight
                    loss6 = criterion(heat6, heatmap_var) * heat_weight

                    loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
                    losses.update(loss.data[0], input.size(0))
                    for cnt, l in enumerate(
                        [loss1, loss2, loss3, loss4, loss5, loss6]):
                        losses_list[cnt].update(l.data[0], input.size(0))

                    batch_time.update(time.time() - end)
                    end = time.time()
                    is_best = losses.avg < best_model
                    best_model = min(best_model, losses.avg)
                    save_checkpoint(
                        {
                            'iter': iters,
                            'state_dict': model.state_dict(),
                        }, is_best, args.model_name)

                    if j % config.display == 0:
                        print(
                            'Test Iteration: {0}\t'
                            'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                            'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                            'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.
                            format(j,
                                   config.display,
                                   batch_time=batch_time,
                                   data_time=data_time,
                                   loss=losses))
                        for cnt in range(0, 6):
                            print(
                                'Loss{0} = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t'
                                .format(cnt + 1, loss1=losses_list[cnt]))

                        print time.strftime(
                            '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
                            time.localtime())
                        batch_time.reset()
                        losses.reset()
                        for cnt in range(6):
                            losses_list[cnt].reset()

                model.train()
Exemplo n.º 31
0
def evaluate(models, val_loader, interp, criterion, args):
    loss_meter = AverageMeter()
    acc_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    time_meter = AverageMeter()

    models.eval()

    for i, batch_data in enumerate(val_loader):
        # forward pass
        images, labels, _ = batch_data

        torch.cuda.synchronize()
        tic = time.perf_counter()

        pred_seg = torch.zeros(images.size(0), args.num_classes,
                               labels.size(1), labels.size(2))
        pred_seg = pred_seg.cuda(args.gpu_id, non_blocking=True)

        for scale in args.scales:
            imgs_scale = zoom(images.numpy(), (1., 1., scale, scale),
                              order=1,
                              prefilter=False,
                              mode='nearest')

            input_images = torch.from_numpy(imgs_scale)
            if args.gpu_id is not None:
                input_images = input_images.cuda(args.gpu_id,
                                                 non_blocking=True)

            pred_scale = models(input_images)
            pred_scale = interp(pred_scale)

            # average the probability
            pred_seg = pred_seg + pred_scale / len(args.scales)

        # pred =torch.log(pred)

        seg_labels = labels.cuda(args.gpu_id, non_blocking=True)

        loss = criterion(pred_seg, seg_labels)
        loss_meter.update(loss.data.item())
        print('[Eval] iter {}, loss: {}'.format(i, loss.data.item()))
        # loss_meter.update(loss.item())
        # print('[Eval] iter {}, loss: {}'.format(i, loss.item()))

        labels = as_numpy(labels)
        _, pred = torch.max(pred_seg, dim=1)
        pred = as_numpy(pred.squeeze(0).cpu())

        # calculate accuracy
        acc, pix = accuracy(pred, labels)
        intersection, union = intersectionAndUnion(pred, labels,
                                                   args.num_classes)
        acc_meter.update(acc, pix)
        intersection_meter.update(intersection)
        union_meter.update(union)

        torch.cuda.synchronize()
        time_meter.update(time.perf_counter() - tic)

        if args.visualize:
            visualize_result(batch_data, pred_seg, args)

    # summary
    iou = intersection_meter.sum / (union_meter.sum + 1e-10)
    for i, _iou in enumerate(iou):
        print('class [ {} ], IoU: {:.4f}'.format(i, _iou))

    print('[Eval Summary]:')
    print(
        'loss: {:.6f}, Mean IoU: {:.2f}%, Accuracy: {:.2f}%, Inference Time: {:.4f}s'
        .format(loss_meter.average(),
                iou.mean() * 100,
                acc_meter.average() * 100, time_meter.average()))