示例#1
0
def train(train_loader, pfld_backbone, auxiliarynet, criterion, optimizer,
          cur_epoch):
    losses = AverageMeter()

    for img, landmark_gt, euler_angle_gt in train_loader:
        img.requires_grad = False
        img = img.cuda(non_blocking=True)

        landmark_gt.requires_grad = False
        landmark_gt = landmark_gt.cuda(non_blocking=True)

        euler_angle_gt.requires_grad = False
        euler_angle_gt = euler_angle_gt.cuda(non_blocking=True)

        pfld_backbone = pfld_backbone.cuda()
        auxiliarynet = auxiliarynet.cuda()

        features, landmarks = pfld_backbone(img)
        angle = auxiliarynet(features)
        weighted_loss, loss = criterion(landmark_gt, euler_angle_gt, angle,
                                        landmarks, args.train_batchsize)

        optimizer.zero_grad()
        weighted_loss.backward()
        optimizer.step()

        losses.update(loss.item())
    return weighted_loss, loss
示例#2
0
def train(train_loader, linear_backbone, criterion, optimizer, cur_epoch):
    losses = AverageMeter()

    for samples in train_loader:
        img = samples['image']
        landmark_gt = samples['landmarks']
        img.requires_grad = False
        img = img.cuda(non_blocking=True)

        landmark_gt.requires_grad = False
        landmark_gt = landmark_gt.cuda(non_blocking=True)

        linear_backbone = linear_backbone.cuda()

        landmarks = linear_backbone(img)
        loss = criterion(landmark_gt, landmarks, args.train_batchsize)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(loss.item())
    return loss
示例#3
0
def validate(loader,
             model,
             criterion_lidar,
             criterion_rgb,
             criterion_local,
             criterion_guide,
             epoch=0):
    # batch_time = AverageMeter()
    losses = AverageMeter()
    metric = Metrics(max_depth=args.max_depth,
                     disp=args.use_disp,
                     normal=args.normal)
    score = AverageMeter()
    score_1 = AverageMeter()
    # Evaluate model
    model.eval()
    # Only forward pass, hence no grads needed
    with torch.no_grad():
        # end = time.time()
        for i, (input, gt) in tqdm(enumerate(loader)):
            if not args.no_cuda:
                input, gt = input.cuda(non_blocking=True), gt.cuda(
                    non_blocking=True)
            prediction, lidar_out, precise, guide = model(input, epoch)

            loss = criterion_local(prediction, gt, epoch)
            loss_lidar = criterion_lidar(lidar_out, gt, epoch)
            loss_rgb = criterion_rgb(precise, gt, epoch)
            loss_guide = criterion_guide(guide, gt, epoch)
            loss = args.wpred * loss + args.wlid * loss_lidar + args.wrgb * loss_rgb + args.wguide * loss_guide
            losses.update(loss.item(), input.size(0))

            metric.calculate(prediction[:, 0:1], gt)
            score.update(metric.get_metric(args.metric), metric.num)
            score_1.update(metric.get_metric(args.metric_1), metric.num)

            if (i + 1) % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Metric {score.val:.4f} ({score.avg:.4f})'.format(
                          i + 1, len(loader), loss=losses, score=score))

        if args.evaluate:
            print("===> Average RMSE score on validation set is {:.4f}".format(
                score.avg))
            print("===> Average MAE score on validation set is {:.4f}".format(
                score_1.avg))
    return score.avg, score_1.avg, losses.avg
示例#4
0
def main():
    global args
    args = parser.parse_args()
    if args.num_samples == 0:
        args.num_samples = None
    if args.val_batch_size is None:
        args.val_batch_size = args.batch_size
    if args.seed:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        # torch.backends.cudnn.deterministic = True
        # warnings.warn('You have chosen to seed training. '
        # 'This will turn on the CUDNN deterministic setting, '
        # 'which can slow down your training considerably! '
        # 'You may see unexpected behavior when restarting from checkpoints.')

    # For distributed training
    # init_distributed_mode(args)

    if not args.no_cuda and not torch.cuda.is_available():
        raise Exception("No gpu available for usage")
    torch.backends.cudnn.benchmark = args.cudnn
    # Init model
    channels_in = 1 if args.input_type == 'depth' else 4
    model = Models.define_model(mod=args.mod,
                                in_channels=channels_in,
                                thres=args.thres)
    define_init_weights(model, args.weight_init)
    # Load on gpu before passing params to optimizer
    if not args.no_cuda:
        if not args.multi:
            model = model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
            # model.cuda()
            # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
            # model = model.module

    save_id = '{}_{}_{}_{}_{}_batch{}_pretrain{}_wlid{}_wrgb{}_wguide{}_wpred{}_patience{}_num_samples{}_multi{}'.\
              format(args.mod, args.optimizer, args.loss_criterion,
                     args.learning_rate,
                     args.input_type,
                     args.batch_size,
                     args.pretrained, args.wlid, args.wrgb, args.wguide, args.wpred,
                     args.lr_decay_iters, args.num_samples, args.multi)

    # INIT optimizer/scheduler/loss criterion
    optimizer = define_optim(args.optimizer, model.parameters(),
                             args.learning_rate, args.weight_decay)
    scheduler = define_scheduler(optimizer, args)

    # Optional to use different losses
    criterion_local = define_loss(args.loss_criterion)
    criterion_lidar = define_loss(args.loss_criterion)
    criterion_rgb = define_loss(args.loss_criterion)
    criterion_guide = define_loss(args.loss_criterion)

    # INIT dataset
    dataset = Datasets.define_dataset(args.dataset, args.data_path,
                                      args.input_type, args.side_selection)
    dataset.prepare_dataset()
    train_loader, valid_loader, valid_selection_loader = get_loader(
        args, dataset)

    # Resume training
    best_epoch = 0
    lowest_loss = np.inf
    args.save_path = os.path.join(args.save_path, save_id)
    mkdir_if_missing(args.save_path)
    log_file_name = 'log_train_start_0.txt'
    args.resume = first_run(args.save_path)
    if args.resume and not args.test_mode and not args.evaluate:
        path = os.path.join(
            args.save_path,
            'checkpoint_model_epoch_{}.pth.tar'.format(int(args.resume)))
        if os.path.isfile(path):
            log_file_name = 'log_train_start_{}.txt'.format(args.resume)
            # stdout
            sys.stdout = Logger(os.path.join(args.save_path, log_file_name))
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(path)
            args.start_epoch = checkpoint['epoch']
            lowest_loss = checkpoint['loss']
            best_epoch = checkpoint['best epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            log_file_name = 'log_train_start_0.txt'
            # stdout
            sys.stdout = Logger(os.path.join(args.save_path, log_file_name))
            print("=> no checkpoint found at '{}'".format(path))

    # Only evaluate
    elif args.evaluate:
        print("Evaluate only")
        best_file_lst = glob.glob(os.path.join(args.save_path, 'model_best*'))
        if len(best_file_lst) != 0:
            best_file_name = best_file_lst[0]
            print(best_file_name)
            if os.path.isfile(best_file_name):
                sys.stdout = Logger(
                    os.path.join(args.save_path, 'Evaluate.txt'))
                print("=> loading checkpoint '{}'".format(best_file_name))
                checkpoint = torch.load(best_file_name)
                model.load_state_dict(checkpoint['state_dict'])
            else:
                print("=> no checkpoint found at '{}'".format(best_file_name))
        else:
            print("=> no checkpoint found at due to empy list in folder {}".
                  format(args.save_path))
        validate(valid_selection_loader, model, criterion_lidar, criterion_rgb,
                 criterion_local, criterion_guide)
        return

    # Start training from clean slate
    else:
        # Redirect stdout
        sys.stdout = Logger(os.path.join(args.save_path, log_file_name))

    # INIT MODEL
    print(40 * "=" + "\nArgs:{}\n".format(args) + 40 * "=")
    print("Init model: '{}'".format(args.mod))
    print("Number of parameters in model {} is {:.3f}M".format(
        args.mod.upper(),
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))

    # Load pretrained state for cityscapes in GLOBAL net
    if args.pretrained and not args.resume:
        if not args.load_external_mod:
            if not args.multi:
                target_state = model.depthnet.state_dict()
            else:
                target_state = model.module.depthnet.state_dict()
            check = torch.load('erfnet_pretrained.pth')
            for name, val in check.items():
                # Exclude multi GPU prefix
                mono_name = name[7:]
                if mono_name not in target_state:
                    continue
                try:
                    target_state[mono_name].copy_(val)
                except RuntimeError:
                    continue
            print('Successfully loaded pretrained model')
        else:
            check = torch.load('external_mod.pth.tar')
            lowest_loss_load = check['loss']
            target_state = model.state_dict()
            for name, val in check['state_dict'].items():
                if name not in target_state:
                    continue
                try:
                    target_state[name].copy_(val)
                except RuntimeError:
                    continue
            print("=> loaded EXTERNAL checkpoint with best rmse {}".format(
                lowest_loss_load))

    # Start training
    for epoch in range(args.start_epoch, args.nepochs):
        print("\n => Start EPOCH {}".format(epoch + 1))
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print(args.save_path)
        # Adjust learning rate
        if args.lr_policy is not None and args.lr_policy != 'plateau':
            scheduler.step()
            lr = optimizer.param_groups[0]['lr']
            print('lr is set to {}'.format(lr))

        # Define container objects
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        score_train = AverageMeter()
        score_train_1 = AverageMeter()
        metric_train = Metrics(max_depth=args.max_depth,
                               disp=args.use_disp,
                               normal=args.normal)

        # Train model for args.nepochs
        model.train()

        # compute timing
        end = time.time()

        # Load dataset
        for i, (input, gt) in tqdm(enumerate(train_loader)):

            # Time dataloader
            data_time.update(time.time() - end)

            # Put inputs on gpu if possible
            if not args.no_cuda:
                input, gt = input.cuda(), gt.cuda()
            prediction, lidar_out, precise, guide = model(input, epoch)

            loss = criterion_local(prediction, gt)
            loss_lidar = criterion_lidar(lidar_out, gt)
            loss_rgb = criterion_rgb(precise, gt)
            loss_guide = criterion_guide(guide, gt)
            loss = args.wpred * loss + args.wlid * loss_lidar + args.wrgb * loss_rgb + args.wguide * loss_guide

            losses.update(loss.item(), input.size(0))
            metric_train.calculate(prediction[:, 0:1].detach(), gt.detach())
            score_train.update(metric_train.get_metric(args.metric),
                               metric_train.num)
            score_train_1.update(metric_train.get_metric(args.metric_1),
                                 metric_train.num)

            # Clip gradients (usefull for instabilities or mistakes in ground truth)
            if args.clip_grad_norm != 0:
                nn.utils.clip_grad_norm(model.parameters(),
                                        args.clip_grad_norm)

            # Setup backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Time trainig iteration
            batch_time.update(time.time() - end)
            end = time.time()

            # Print info
            if (i + 1) % args.print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Metric {score.val:.4f} ({score.avg:.4f})'.format(
                          epoch + 1,
                          i + 1,
                          len(train_loader),
                          batch_time=batch_time,
                          loss=losses,
                          score=score_train))

        print("===> Average RMSE score on training set is {:.4f}".format(
            score_train.avg))
        print("===> Average MAE score on training set is {:.4f}".format(
            score_train_1.avg))
        # Evaulate model on validation set
        print("=> Start validation set")
        score_valid, score_valid_1, losses_valid = validate(
            valid_loader, model, criterion_lidar, criterion_rgb,
            criterion_local, criterion_guide, epoch)
        print("===> Average RMSE score on validation set is {:.4f}".format(
            score_valid))
        print("===> Average MAE score on validation set is {:.4f}".format(
            score_valid_1))
        # Evaluate model on selected validation set
        if args.subset is None:
            print("=> Start selection validation set")
            score_selection, score_selection_1, losses_selection = validate(
                valid_selection_loader, model, criterion_lidar, criterion_rgb,
                criterion_local, criterion_guide, epoch)
            total_score = score_selection
            print("===> Average RMSE score on selection set is {:.4f}".format(
                score_selection))
            print("===> Average MAE score on selection set is {:.4f}".format(
                score_selection_1))
        else:
            total_score = score_valid

        print("===> Last best score was RMSE of {:.4f} in epoch {}".format(
            lowest_loss, best_epoch))
        # Adjust lr if loss plateaued
        if args.lr_policy == 'plateau':
            scheduler.step(total_score)
            lr = optimizer.param_groups[0]['lr']
            print('LR plateaued, hence is set to {}'.format(lr))

        # File to keep latest epoch
        with open(os.path.join(args.save_path, 'first_run.txt'), 'w') as f:
            f.write(str(epoch))

        # Save model
        to_save = False
        if total_score < lowest_loss:

            to_save = True
            best_epoch = epoch + 1
            lowest_loss = total_score
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'best epoch': best_epoch,
                'arch': args.mod,
                'state_dict': model.state_dict(),
                'loss': lowest_loss,
                'optimizer': optimizer.state_dict()
            }, to_save, epoch)
    if not args.no_tb:
        writer.close()
示例#5
0
def train(gpu, config):
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=config['num_gpus'],
                            rank=gpu)
    torch.cuda.set_device(gpu)
    """ 
        @ build the dataset for training
    """
    dataset = get_data(config)
    trainset = dataset(config, "train")
    testset = dataset(config, "test")
    sampler_train = DistributedSampler(trainset,
                                       num_replicas=config['num_gpus'],
                                       rank=gpu)
    sampler_val = DistributedSampler(testset,
                                     num_replicas=config['num_gpus'],
                                     rank=gpu)

    batch_size = config['batch_size']
    loader_train = DataLoader(dataset=trainset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=config['num_threads'],
                              pin_memory=True,
                              sampler=sampler_train,
                              drop_last=True)
    loader_val = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True,
                            sampler=sampler_val,
                            drop_last=True)
    model = UNet(config["in_channels"],
                 config["out_channels"],
                 post_processing=True)
    model.cuda(gpu)
    mask_sampling = masksamplingv2()
    """  @ init parameter
    """

    save_folder = os.path.join(
        config['save_root'], 'batch_{}_lr_{}'.format(config['batch_size'],
                                                     config['lr']))
    best_epoch = 0
    lowest_loss = 0.
    resume = 0
    print('=>Save folder: {}\n'.format(save_folder))
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    optimizer = define_optim(config['optimizer'], model.parameters(),
                             float(config['lr']), 0)

    criterion_1 = define_loss(config['loss_type'])
    criterion_2 = define_loss("Multimse")
    scheduler = define_scheduler(optimizer, config)
    """
        @ justify the resume model
    """
    if config['resume'] != 'None':
        checkpoint = torch.load(config['resume'],
                                map_location=torch.device('cpu'))
        resume = checkpoint['epoch']
        lowest_loss = checkpoint['loss']
        best_epoch = checkpoint['best_epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O0',
                                          verbosity=0)
        amp.load_state_dict(checkpoint['amp'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            resume, checkpoint['epoch']))
        del checkpoint
    log_file = 'log_train_start_{}.txt'.format(resume)
    """
        @ convert model to multi-gpus modes for training
    """
    model = apex.parallel.convert_syncbn_model(model)
    if config['resume'] == 'None':
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O0',
                                          verbosity=0)
    model = DDP(model)
    if gpu == 0:
        sys.stdout = Logger(os.path.join(save_folder, log_file))
    print("Number of parameters in model is {:.3f}M".format(
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))
    """
        @ start to train
    """
    for epoch in range(resume + 1, config['epoches'] + 1):
        print('=> Starch Epoch {}\n'.format(epoch))
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print('learning rate is set to {}.\n'.format(
            optimizer.param_groups[0]['lr']))
        model.train()
        sampler_train.set_epoch(epoch)
        batch_time = AverageMeter()
        losses = AverageMeter()
        metric_train = Metrics()
        rmse_train = AverageMeter()
        mae_train = AverageMeter()
        time_snap = time.time()
        for i, inputs in tqdm(enumerate(loader_train)):
            gt, noise = inputs['gt'].cuda(gpu), inputs['noise'].cuda(gpu)
            optimizer.zero_grad()
            """ update the train inputs
            """
            # patten = np.random.randint(0, 4, 1)
            patten = torch.randint(0, 8, (1, ))
            redinput, blueinput = mask_sampling(noise, patten)

            # redinput, blueinput = generator(noise, mask1, mask2)
            output = model(redinput)
            loss = criterion_1(output, blueinput)
            fulloutput = model(noise)
            redoutput, blueoutput = mask_sampling(fulloutput, patten)
            # redoutput, blueoutput = generator(fulloutput, mask1, mask2)

            loss2 = criterion_2(output, blueinput, redoutput, blueoutput)
            losssum = config["gamma"] * loss2 + loss
            with amp.scale_loss(losssum, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()
            "@ map-reduce tensor"
            rt = reduce_tensor(losssum.data)
            torch.cuda.synchronize()
            losses.update(rt.item(), loader_train.batch_size)
            metric_train.calculate(fulloutput.detach(), gt)
            rmse_train.update(metric_train.get_metric('mse'), metric_train.num)
            mae_train.update(metric_train.get_metric('mae'), metric_train.num)
            batch_time.update(time.time() - time_snap)
            time_snap = time.time()
            if (i + 1) % config['print_freq'] == 0:
                if gpu == 0:
                    print('Epoch: [{0}][{1}/{2}]\t'
                          'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                          'Loss {loss.val:.6f} ({loss.avg:.6f})\t'
                          'Metric {rmse_train.val:.6f} ({rmse_train.avg:.6f})'.
                          format(epoch,
                                 i + 1,
                                 len(loader_train),
                                 batch_time=batch_time,
                                 loss=losses,
                                 rmse_train=rmse_train))
            if (i + 1) % config['save_freq'] == 0:
                print('=> Start sub-selection validation set')
                rmse, mae = val(model, loader_val, epoch, gpu)
                model.train()
                if gpu == 0:
                    print("===> Average RMSE score on selection set is {:.6f}".
                          format(rmse))
                    print("===> Average MAE score on selection set is {:.6f}".
                          format(mae))
                    print(
                        "===> Last best score was RMSE of {:.6f} in epoch {}".
                        format(lowest_loss, best_epoch))

                    if rmse > lowest_loss:
                        lowest_loss = rmse
                        best_epoch = epoch
                        states = {
                            'epoch': epoch,
                            'best_epoch': best_epoch,
                            'loss': lowest_loss,
                            'state_dict': model.module.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'scheduler': scheduler.state_dict(),
                            'amp': amp.state_dict()
                        }

                        save_checkpoints(states, save_folder, epoch, gpu, True)
        # save checkpoints
        print('=> Start selection validation set')
        rmse, mae = val(model, loader_val, epoch, gpu)
        model.train()
        if gpu == 0:
            print("===> Average RMSE score on selection set is {:.6f}".format(
                rmse))
            print("===> Average MAE score on selection set is {:.6f}".format(
                mae))
            print("===> Last best score was RMSE of {:.6f} in epoch {}".format(
                lowest_loss, best_epoch))
            if rmse > lowest_loss:
                best_epoch = epoch
                lowest_loss = rmse
                states = {
                    'epoch': epoch,
                    'best_epoch': best_epoch,
                    'loss': lowest_loss,
                    'state_dict': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'amp': amp.state_dict()
                }
                save_checkpoints(states, save_folder, epoch, gpu, True)

        if config['lr_policy'] == 'plateau':
            scheduler.step(rmse)
        else:
            scheduler.step()
        # if (epoch) % 10 == 0:
        #     config["gamma"] += 0.5
        print('=>> the model training finish!')
示例#6
0
    def test_epoch(self, epoch, data_loader, model, criterion, opt, logger):
        print('test at epoch {}'.format(epoch))

        model.eval()

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        accuracies = AverageMeter()

        end_time = time.time()

        for i, (inputs, targets, _) in enumerate(data_loader):
            data_time.update(time.time() - end_time)
            if not opt.no_cuda:
                targets = targets.cuda(non_blocking=True)
                with torch.no_grad():
                    inputs = Variable(inputs)
                    targets = Variable(targets)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    acc = calculate_accuracy(outputs, targets)
                    losses.update(loss.data, inputs.size(0))
                    accuracies.update(acc, inputs.size(0))

                    batch_time.update(time.time() - end_time)
                    end_time = time.time()

                    print('Epoch: [{0}][{1}/{2}]\t'
                          'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                          'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                          'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                          'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                              epoch,
                              i + 1,
                              len(data_loader),
                              batch_time=batch_time,
                              data_time=data_time,
                              loss=losses,
                              acc=accuracies))
            logger.log({
                'epoch': epoch,
                'loss': losses.avg,
                'acc': accuracies.avg
            })

            return losses.avg
示例#7
0
    def val_epoch(self, epoch, data_loader, model, criterion, opt, logger):
        print('validation at epoch {}'.format(epoch))

        model.eval()

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        accuracies = AverageMeter()

        end_time = time.time()
        confusion_matrix = np.zeros((opt.n_classes, opt.n_classes))
        confidence_for_each_validation = {}
        ###########################################################################

        # pdb.set_trace()
        for i, (inputs, targets, paths) in enumerate(data_loader):
            data_time.update(time.time() - end_time)

            targets = targets.cuda(non_blocking=True)
            with torch.no_grad():
                inputs = Variable(inputs)
                targets = Variable(targets)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                acc = calculate_accuracy(outputs, targets)
                ########  temp line, needs to be removed##################################
                for j in range(len(targets)):
                    key = paths[j].split('/')[-1]
                    confidence_for_each_validation[key] = [
                        x.item() for x in outputs[j]
                    ]

                rows = [int(x) for x in targets]
                columns = [int(x) for x in np.argmax(outputs.data.cpu(), 1)]
                assert len(rows) == len(columns)
                for idx in range(len(rows)):
                    confusion_matrix[rows[idx]][columns[idx]] += 1

                ###########################################################################
                losses.update(loss.data, inputs.size(0))
                accuracies.update(acc, inputs.size(0))

                batch_time.update(time.time() - end_time)
                end_time = time.time()

                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                          epoch,
                          i + 1,
                          len(data_loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          acc=accuracies))
            #########  temp line, needs to be removed##################################
            # print(confusion_matrix)
        confusion_matrix = pd.DataFrame(confusion_matrix)
        # confusion_matrix.to_csv(file)
        confidence_matrix = pd.DataFrame.from_dict(
            confidence_for_each_validation, orient='index')

        #     confidence_matrix.to_csv('confidence_matrix.csv')

        #########  temp line, needs to be removed##################################

        logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg})

        return losses.avg, confusion_matrix, confidence_matrix
示例#8
0
    def train_epoch(self, epoch, data_loader, model, criterion, optimizer, opt,
                    epoch_logger, batch_logger):
        print('train at epoch {}'.format(epoch))
        model.train()

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        accuracies = AverageMeter()

        end_time = time.time()
        for i, (inputs, targets, _) in enumerate(data_loader):
            data_time.update(time.time() - end_time)

            targets = targets.cuda(non_blocking=True)
            inputs = Variable(inputs)
            targets = Variable(targets)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            acc = calculate_accuracy(outputs, targets)

            losses.update(loss.data, inputs.size(0))
            accuracies.update(acc, inputs.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_time.update(time.time() - end_time)
            end_time = time.time()

            batch_logger.log({
                'epoch': epoch,
                'batch': i + 1,
                'iter': (epoch - 1) * len(data_loader) + (i + 1),
                'loss': losses.val,
                'acc': accuracies.val,
                'lr': optimizer.param_groups[0]['lr']
            })

            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                      epoch,
                      i + 1,
                      len(data_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      acc=accuracies))
        epoch_logger.log({
            'epoch': epoch,
            'loss': losses.avg,
            'acc': accuracies.avg,
            'lr': optimizer.param_groups[0]['lr']
        })

        if epoch % opt.checkpoint == 0:
            save_file_path = os.path.join(opt.Results_directory,
                                          'save_{}.pth'.format(epoch))
            states = {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(states, save_file_path)
示例#9
0
def validate(loader, model, criterion_rgb, criterion_local, epoch=0):
    # batch_time = AverageMeter()
    losses = AverageMeter()
    metric = Metrics(max_depth=args.max_depth, disp=args.use_disp)
    score = AverageMeter()
    score_1 = AverageMeter()
    loss_rgb = torch.zeros(1)
    # Evaluate model
    model.eval()
    # Only forward pass, hence no grads needed
    with torch.no_grad():
        # end = time.time()
        for i, (input, gt) in tqdm(enumerate(loader)):
            if not args.no_cuda:
                input, gt = input.cuda(non_blocking=True), gt.cuda(
                    non_blocking=True)
            prediction, hidden = model(input, hidden=(None, None))

            if 'mod' in args.mod or 'stacked' in args.mod:
                loss = criterion_local(prediction[0], gt)
                loss_rgb = criterion_rgb(prediction[1], gt)
                loss += args.wrgb * loss_rgb
                prediction = prediction[0]
            else:
                loss = criterion_local(prediction, gt)

            losses.update(loss.item(), input.size(0))

            metric.calculate(prediction[:, 0:1], gt)
            score.update(metric.get_metric(args.metric), metric.num)
            score_1.update(metric.get_metric(args.metric_1), metric.num)

            # save 8 images for visualization
            skip = 50
            if args.modality == 'd':
                img_merge = None
            else:
                if args.modality == 'rgb':
                    rgb = input
                elif args.modality == 'rgbd':
                    rgb = input[:, :3, :, :]
                    depth = input[:, 3, :, :]

                if i == 0:
                    if args.modality == 'rgbd':
                        img_merge = merge_into_row_with_gt(
                            rgb, depth, gt, prediction)
                    else:
                        img_merge = merge_into_row(rgb, gt, prediction)
                elif (i < 8 * skip) and (i % skip == 0):
                    if args.modality == 'rgbd':
                        row = merge_into_row_with_gt(rgb, depth, gt,
                                                     prediction)
                    else:
                        row = merge_into_row(rgb, gt, prediction)
                    img_merge = add_row(img_merge, row)
                elif i == 8 * skip:
                    filename = args.save_path + '/comparison_' + str(
                        epoch) + '.png'
                    save_image(img_merge, filename)

            if (i + 1) % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Metric {score.val:.4f} ({score.avg:.4f})'.format(
                          i + 1, len(loader), loss=losses, score=score))

        if args.evaluate:
            print("===> Average RMSE score on validation set is {:.4f}".format(
                score.avg))
            print("===> Average MAE score on validation set is {:.4f}".format(
                score_1.avg))
    return score.avg, score_1.avg, losses.avg
示例#10
0
def main():
    global args
    args = parser.parse_args()
    if args.num_samples == 0:
        # Use all lidar points
        args.num_samples = None
    else:
        args.data_path = ""  # path to precomputed 500 samples
        assert args.num_samples == 500
        print("changed path to samples500 dataset")
    if args.val_batch_size is None:
        args.val_batch_size = args.batch_size
    if args.seed:
        random.seed(args.seed)
        torch.manual_seed(args.seed)

    # init_distributed_mode(args)

    if not args.no_cuda and not torch.cuda.is_available():
        raise Exception("No gpu available for usage")
    torch.backends.cudnn.benchmark = args.cudnn
    # Init model
    args.channels_in = 3 if args.input_type == 'rgb' else 4
    model = Models.define_model(args.mod, args)
    # define_init_weights(model, args.weight_init)

    # Load on gpu before passing params to optimizer
    if not args.no_cuda:
        if not args.multi:
            model = model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
            # model.cuda()
            # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
            # model = model.module

    save_id = '{}_{}_{}_{}_{}_batch{}_pretrain{}_wrgb{}_drop{}_patience{}_num_samples{}_multi{}_submod{}'.\
              format(args.mod, args.optimizer, args.loss_criterion,
                     args.learning_rate,
                     args.input_type,
                     args.batch_size,
                     args.pretrained, args.wrgb, args.drop,
                     args.lr_decay_iters, args.num_samples, args.multi, args.submod)

    # INIT optimizer/scheduler/loss criterion
    optimizer = define_optim(args.optimizer, model.parameters(),
                             args.learning_rate, args.weight_decay)
    scheduler = define_scheduler(optimizer, args)

    # Optional to use different losses
    criterion_local = define_loss(args.loss_criterion)
    criterion_rgb = define_loss(args.loss_criterion)

    # INIT dataset
    dataset = Datasets.define_dataset(args.dataset, args.data_path,
                                      args.input_type)
    dataset.prepare_dataset()
    train_loader, _, valid_loader, valid_selection_loader = get_loader(
        args, dataset)

    # Resume training
    best_epoch = 0
    lowest_loss = np.inf
    args.save_path = os.path.join(args.save_path, save_id)
    mkdir_if_missing(args.save_path)
    log_file_name = 'log_train_start_0.txt'
    args.resume = first_run(args.save_path)
    if args.resume and not args.test_mode and not args.evaluate:
        path = os.path.join(
            args.save_path,
            'checkpoint_model_epoch_{}.pth.tar'.format(int(args.resume)))
        if os.path.isfile(path):
            log_file_name = 'log_train_start_{}.txt'.format(args.resume)
            # stdout
            sys.stdout = Logger(os.path.join(args.save_path, log_file_name))
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(path)
            args.start_epoch = checkpoint['epoch']
            lowest_loss = checkpoint['loss']
            best_epoch = checkpoint['best epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            log_file_name = 'log_train_start_0.txt'
            # stdout
            sys.stdout = Logger(os.path.join(args.save_path, log_file_name))
            print("=> no checkpoint found at '{}'".format(path))

    # Only evaluate
    elif args.evaluate:
        print("Evaluate only")
        best_file_lst = glob.glob(os.path.join(args.save_path, 'model_best*'))
        if len(best_file_lst) != 0:
            best_file_name = best_file_lst[0]
            print(best_file_name)
            if os.path.isfile(best_file_name):
                sys.stdout = Logger(
                    os.path.join(args.save_path, 'Evaluate.txt'))
                print("=> loading checkpoint '{}'".format(best_file_name))
                checkpoint = torch.load(best_file_name)
                model.load_state_dict(checkpoint['state_dict'])
            else:
                print("=> no checkpoint found at '{}'".format(best_file_name))
        else:
            print("=> no checkpoint found at due to empy list in folder {}".
                  format(args.save_path))
        validate(valid_selection_loader, model, criterion_global,
                 criterion_local)
        return

    # Start training from clean slate
    else:
        # Redirect stdout
        sys.stdout = Logger(os.path.join(args.save_path, log_file_name))

    # INIT MODEL
    print(40 * "=" + "\nArgs:{}\n".format(args) + 40 * "=")
    print("Init model: '{}'".format(args.mod))
    print("Number of parameters in model {} is {:.3f}M".format(
        args.mod.upper(),
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))

    # Load pretrained state for cityscapes in GLOBAL net
    # if args.pretrained and not args.resume:
    # if not args.multi:
    # target_state = model.depthnet.state_dict()
    # else:
    # target_state = model.module.depthnet.state_dict()
    # check = torch.load('erfnet_pretrained.pth')
    # for name, val in check.items():
    # # Exclude multi GPU prefix
    # mono_name = name[7:]
    # if mono_name not in target_state:
    # continue
    # try:
    # target_state[mono_name].copy_(val)
    # except RuntimeError:
    # continue
    # print('Successfully loaded pretrained model')

    # Create summary writer
    log_path = os.path.join(args.save_path, "logs")
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    with open(os.path.join(args.save_path, 'commandline_args.txt'), 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    # Start training
    for epoch in range(args.start_epoch, args.nepochs):
        print("\n => Start EPOCH {}".format(epoch + 1))
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print(args.save_path)
        # Adjust learning rate
        if args.lr_policy is not None and args.lr_policy != 'plateau':
            scheduler.step()
            lr = optimizer.param_groups[0]['lr']
            print('lr is set to {}'.format(lr))

        # Define container objects
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        score_train = AverageMeter()
        score_train_1 = AverageMeter()
        metric_train = Metrics(max_depth=args.max_depth, disp=args.use_disp)

        # Train model for args.nepochs
        model.train()

        # compute timing
        end = time.time()

        # Load dataset
        for i, (input, gt) in tqdm(enumerate(train_loader)):

            # Time dataloader
            data_time.update(time.time() - end)

            # Put inputs on gpu if possible
            if not args.no_cuda:
                input, gt = input.cuda(), gt.cuda()
            prediction, hidden = model(input, hidden=(None, None))

            if 'mod' in args.mod or 'stacked' in args.mod:
                loss = criterion_local(prediction[0], gt)
                loss_rgb = criterion_rgb(prediction[1], gt)
                loss += args.wrgb * loss_rgb
                prediction = prediction[0]
            else:
                loss = criterion_local(prediction, gt)

            losses.update(loss.item(), input.size(0))
            metric_train.calculate(prediction[:, 0:1].detach(), gt.detach())
            score_train.update(metric_train.get_metric(args.metric),
                               metric_train.num)
            score_train_1.update(metric_train.get_metric(args.metric_1),
                                 metric_train.num)

            # Clip gradients (usefull for instabilities or mistakes in ground truth)
            if args.clip_grad_norm != 0:
                nn.utils.clip_grad_norm(model.parameters(),
                                        args.clip_grad_norm)

            # Setup backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Time trainig iteration
            batch_time.update(time.time() - end)
            end = time.time()

            # Print info
            if (i + 1) % args.print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Metric {score.val:.4f} ({score.avg:.4f})'.format(
                          epoch + 1,
                          i + 1,
                          len(train_loader),
                          batch_time=batch_time,
                          loss=losses,
                          score=score_train))

            batch_num = len(train_loader)

            if ((i + 1) % 10 == 0) and (logger is not None):

                current_step = epoch * batch_num + i

                # Add scalar summaries
                logger.add_scalar('Train_loss/Loss', loss.item(), current_step)

                # utils.record_scalar_summary(result, average_meter, current_step, logger, "Train")
                # # Add system info
                # logger.add_scalar('System/gpu_time', average_meter.average().gpu_time, current_step)
                # logger.add_scalar('System/data_time', average_meter.average().data_time, current_step)

                if ((i + 1) % 200 == 0):

                    # Add some image summary
                    if args.modality == "rgb":
                        input_images = input.cpu()
                    else:
                        input_images = input[:, :3, :, :].cpu()
                        input_depth = torch.unsqueeze(input[:, 3, :, :],
                                                      dim=1).cpu()

                    rgb_grid = make_grid(input_images[0:6, :, :, :],
                                         nrow=3,
                                         normalize=True),
                    target_grid = make_grid(gt.cpu()[0:6, :, :, :],
                                            nrow=3,
                                            normalize=True)
                    pred_grid = make_grid(prediction.cpu()[0:6, :, :, :],
                                          nrow=3,
                                          normalize=True)

                    logger.add_image('Train/RGB', rgb_grid[0].data.numpy(),
                                     current_step)
                    logger.add_image('Train/Depth_gt',
                                     target_grid.data.numpy(), current_step)
                    logger.add_image('Train/Depth_pred',
                                     pred_grid.data.numpy(), current_step)

                    if args.modality == "rgbd":
                        depth_grid = make_grid(input_depth[0:6, :, :, :],
                                               nrow=3,
                                               normalize=True)
                        logger.add_image('Train/Depth_input',
                                         depth_grid.data.numpy(), current_step)

        print("===> Average RMSE score on training set is {:.4f}".format(
            score_train.avg))
        print("===> Average MAE score on training set is {:.4f}".format(
            score_train_1.avg))
        # Evaulate model on validation set
        print("=> Start validation set")
        score_valid, score_valid_1, losses_valid = validate(
            valid_loader, model, criterion_rgb, criterion_local, epoch)
        print("===> Average RMSE score on validation set is {:.4f}".format(
            score_valid))
        print("===> Average MAE score on validation set is {:.4f}".format(
            score_valid_1))
        # Evaluate model on selected validation set
        if args.subset is None:
            print("=> Start selection validation set")
            score_selection, score_selection_1, losses_selection = validate(
                valid_selection_loader, model, criterion_rgb, criterion_local,
                epoch)
            total_score = score_selection
            print("===> Average RMSE score on selection set is {:.4f}".format(
                score_selection))
            print("===> Average MAE score on selection set is {:.4f}".format(
                score_selection_1))
        else:
            total_score = score_valid

        print("===> Last best score was RMSE of {:.4f} in epoch {}".format(
            lowest_loss, best_epoch))
        # Adjust lr if loss plateaued
        if args.lr_policy == 'plateau':
            scheduler.step(total_score)
            lr = optimizer.param_groups[0]['lr']
            print('LR plateaued, hence is set to {}'.format(lr))

        # File to keep latest epoch
        with open(os.path.join(args.save_path, 'first_run.txt'), 'w') as f:
            f.write(str(epoch))

        # Save model
        to_save = False
        if total_score < lowest_loss:

            to_save = True
            best_epoch = epoch + 1
            lowest_loss = total_score
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'best epoch': best_epoch,
                'arch': args.mod,
                'state_dict': model.state_dict(),
                'loss': lowest_loss,
                'optimizer': optimizer.state_dict()
            }, to_save, epoch)
    if not args.no_tb:
        writer.close()
示例#11
0
def main():
    global args
    args = parser.parse_args()
    if args.num_samples == 0:
        args.num_samples = None

    if args.cuda and not torch.cuda.is_available():
        raise Exception("No gpu available for usage")

    # Init model
    channels_in = 1 if args.input_type == 'depth' else 4
    model = Models.define_model(mod=args.mod, in_channels=channels_in)

    if args.mod == 'mod':
        define_init_weights(model, args.weight_init)

    # Load on gpu before passing params to optimizer
    if args.cuda:
        model = model.cuda()

    save_id = '{}_{}_{}_{}_batch{}_pretrain{}_wlid{}_wrgb{}_wguide{}_wpred{}_num_samples{}'.\
              format(args.mod, args.loss_criterion_source,
                     args.learning_rate,
                     args.input_type,
                     args.batch_size,
                     args.load_path!='', args.wlid, args.wrgb, args.wguide, args.wpred,
                    args.num_samples)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    # Optional to use different losses
    criterion_source = define_loss(args.loss_criterion_source)
    criterion_target = define_loss(args.loss_criterion_target)

    # INIT KITTI dataset
    print('Load KITTI')
    dataset = Datasets.define_dataset('kitti', args.data_path_target,
                                      args.input_type)
    dataset.prepare_dataset()
    train_loader = get_loader(args, dataset, only_train=True)

    # INIT Carla dataset
    print('Load Carla')
    dataset = Datasets.define_dataset('carla', args.data_path_source,
                                      args.input_type)
    dataset.prepare_dataset()
    # The sparsification of the data and projection from the LiDAR reference
    # frame to the RGB camera explained in the paper happens in the dataloader
    train_loader_carla = get_loader(args,
                                    dataset,
                                    is_carla=True,
                                    only_train=True)
    train_loader_iter = iter(train_loader)

    # Resume training
    if args.save_name == '':
        args.save_path = os.path.join(args.save_path, save_id)
    else:
        args.save_path = os.path.join(args.save_path, args.save_name)
    if os.path.exists(args.save_path):
        raise Exception('Save path already exists')

    mkdir_if_missing(args.save_path)

    # INIT MODEL
    print(40 * "=" + "\nArgs:{}\n".format(args) + 40 * "=")
    print("Init model: '{}'".format(args.mod))
    print("Number of parameters in model {} is {:.3f}M".format(
        args.mod.upper(),
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))

    # Load pretrained state
    if args.load_path != '':
        print("=> loading checkpoint {:s}".format(args.load_path))
        check = torch.load(
            args.load_path,
            map_location=lambda storage, loc: storage)['state_dict']
        model.load_state_dict(check)

    if args.use_image_translation:
        image_trans_net = ResnetGeneratorCycle(3, 3, 64, n_blocks=9)
        state_dict = torch.load('./image_translation_weights.pth')
        image_trans_net.load_state_dict(state_dict)
        image_trans_net.eval()
        if args.cuda:
            image_trans_net = image_trans_net.cuda()

    # Start training
    global_step = 0
    for epoch in range(args.start_epoch, args.nepochs):
        print("\n => Start EPOCH {}".format(epoch + 1))

        # Define container objects
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        score_train_rmse = AverageMeter()
        score_train_mae = AverageMeter()
        metric_train = Metrics(max_depth=args.max_depth)

        # Train model for args.nepochs
        model.train()

        # compute timing
        end = time.time()
        for i, (input, gt, filepath) in tqdm(enumerate(train_loader_carla)):
            # Time dataloader
            data_time.update(time.time() - end)
            loss_extra = 0
            # Put inputs on gpu if possible
            if args.cuda:
                input, gt = input.cuda(), gt.cuda()

            # The LiDAR depths have large regions where no input depth is given
            # We remove all of the GT in the synthetic data where no input information is given
            # in a NxN window around the GT point (we set N=41) to avoid the model trying to estimate
            # depth for areas without any input guidance
            input_depth = input[:, 0:1]
            input_depth, gt = filter_data(input_depth,
                                          gt,
                                          max_depth=args.max_depth)
            input[:, 0:1] = input_depth

            ### Load target set (KITTI) data
            if args.train_target:
                try:
                    input_target, gt_target, filepath_t = next(
                        train_loader_iter)
                except:
                    train_loader_iter = iter(train_loader)
                    input_target, gt_target, filepath_t = next(
                        train_loader_iter)

                if args.cuda:
                    input_target, gt_target = input_target.cuda(
                    ), gt_target.cuda()

            if args.use_image_translation:
                # The CycleGAN model was trained with inputs in the range of [-1, 1]
                with torch.no_grad():
                    rgb_trans = image_trans_net(input[:, 1:] / 128.5 - 1)
                rgb_trans = 128.5 * (rgb_trans + 1)
                rgb_trans = rgb_trans.clamp(0, 255)
                input = torch.cat([input[:, :1], rgb_trans], 1)

            if args.train_target:
                input_joint = torch.cat([input, input_target])
                prediction, lidar_out, precise, guide = model(
                    input_joint, epoch)
                # We separate predictions from the target domain and source domain
                prediction_target, lidar_out_target, precise_target, guide_target = prediction[
                    args.batch_size:], lidar_out[args.batch_size:], precise[
                        args.batch_size:], guide[args.batch_size:]
                prediction, lidar_out, precise, guide = prediction[:args.
                                                                   batch_size], lidar_out[:
                                                                                          args
                                                                                          .
                                                                                          batch_size], precise[:
                                                                                                               args
                                                                                                               .
                                                                                                               batch_size], guide[:
                                                                                                                                  args
                                                                                                                                  .
                                                                                                                                  batch_size]
            else:
                prediction, lidar_out, precise, guide = model(input, epoch)

            # We compute the loss for the source domain data
            loss = criterion_source(prediction, gt)
            loss_lidar = criterion_source(lidar_out, gt)
            loss_rgb = criterion_source(precise, gt)
            loss_guide = criterion_source(guide, gt)
            loss = args.wpred * loss + args.wlid * loss_lidar + args.wrgb * loss_rgb + args.wguide * loss_guide

            if args.train_target:
                loss_target = 0
                # We filter the input data for supervision as explained in the paper
                filtered_sparse_data = filter_sparse_guidance(
                    input_target[:, :1], args.filter_window, args.filter_th)
                # We compute the loss for the target domain data
                loss_target += args.wpred * (criterion_target(
                    prediction_target, filtered_sparse_data))
                loss_target += args.wlid * (criterion_target(
                    lidar_out_target, filtered_sparse_data))
                loss_target += args.wrgb * (criterion_target(
                    precise_target, filtered_sparse_data))
                loss_target += args.wguide * (criterion_target(
                    guide_target, filtered_sparse_data))

                loss = loss + loss_target

            metric_train.calculate(prediction[:, 0:1].detach(), gt.detach())

            score_train_rmse.update(metric_train.get_metric('rmse'),
                                    metric_train.num)
            score_train_mae.update(metric_train.get_metric('mae'),
                                   metric_train.num)
            losses.update(loss.item(), input.size(0))

            # Optimization step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_time.update(time.time() - end)
            end = time.time()

            global_step += 1

            # Print info
            if (i + 1) % args.print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'RMSE Train {score.val:.4f} ({score.avg:.4f})'.format(
                          epoch + 1,
                          i + 1,
                          len(train_loader_carla),
                          batch_time=batch_time,
                          loss=losses,
                          score=score_train_rmse))

            if global_step == args.n_training_iterations:
                dict_save = {
                    'epoch': epoch + 1,
                    'arch': args.mod,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }
                save_checkpoint(dict_save, False, epoch + 1, global_step)
                return 1
        print("===> Average RMSE score on training set is {:.4f}".format(
            score_train_rmse.avg))
        print("===> Average MAE score on training set is {:.4f}".format(
            score_train_mae.avg))
        dict_save = {
            'epoch': epoch + 1,
            'arch': args.mod,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_checkpoint(dict_save, False, epoch + 1)
示例#12
0
def eval_32bit(model, test_loader):
    device = model.device
    criterion = model.criterion

    model.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    for i, data in enumerate(tqdm(test_loader)):
        image = data[0].type(torch.FloatTensor).to(device)
        label = data[1].type(torch.LongTensor).to(device)
        pred_label = model(image)

        loss = criterion(pred_label, label)
        # measure accuracy and record loss
        prec1, prec5 = accuracy(pred_label.data, label.data, topk=(1, 5))
        losses.update(loss.item(), image.size(0))
        top1.update(prec1.item(), image.size(0))
        top5.update(prec5.item(), image.size(0))
        # timing
        batch_time.update(time.time() - end)
        end = time.time()

    print('Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'.format(
        losses.avg, top1.avg, top5.avg))

    acc = top1.avg
    loss = losses.avg

    return acc, loss
def validate(loader, model, criterion_rgb, criterion_local, epoch=0):
    # batch_time = AverageMeter()
    losses = AverageMeter()
    metric = Metrics(max_depth=args.max_depth, disp=args.use_disp, normal=args.normal)
    score = AverageMeter()
    score_1 = AverageMeter()
    loss_rgb = torch.zeros(1)
    # Evaluate model
    model.eval()
    # Only forward pass, hence no grads needed
    with torch.no_grad():
        # end = time.time()
        for i, (input, gt) in enumerate(loader):
            if not args.no_cuda:
                input, gt = input.cuda(non_blocking=True), gt.cuda(non_blocking=True)
            prediction = model(input)

            if 'mod' in args.mod or 'stacked' in args.mod:
                loss = criterion_local(prediction[0], gt)
                loss_rgb = criterion_rgb(prediction[1], gt)
                loss += args.wrgb*loss_rgb
                prediction = prediction[0]
            else:
                loss = criterion_local(prediction, gt)

            losses.update(loss.item(), input.size(0))

            metric.calculate(prediction[:, 0:1], gt)
            score.update(metric.get_metric(args.metric), metric.num)
            score_1.update(metric.get_metric(args.metric_1), metric.num)

            if (i + 1) % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Metric {score.val:.4f} ({score.avg:.4f})'.format(
                       i+1, len(loader), loss=losses,
                       score=score))

        # Synchronization needed
        if args.world_size>1:
            score.synchronize_between_processes()
            score_1.synchronize_between_processes()

        if args.evaluate:
            print("===> Average RMSE score on validation set is {:.4f}".format(score.avg))
            print("===> Average MAE score on validation set is {:.4f}".format(score_1.avg))
    return score.avg, score_1.avg, losses.avg
示例#14
0
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        target_index_output, target_index_target = list(), list()
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1 = accuracy(output, target)
            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # for auroc get value from target index
            output_cpu = output.cpu().data.numpy()

            output_cpu = np.array(
                [softmax(out)[args.target_index] for out in output_cpu])

            target_index_output.extend(output_cpu.astype(np.float))
            target_index_target.extend(
                np.equal(target.cpu().data.numpy(),
                         args.target_index).astype(np.int))
            # --------------------------------------

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1))

        auc, roc = compute_auroc(target_index_output, target_index_target)

        print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))
        save_auroc(auc, roc, os.path.join(args.result, 'rocgraph' + '.png'))
    return top1.avg
示例#15
0
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record lossdks
        prec1 = accuracy(output, target)
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1))

    savelogloss = save_loss_log(epoch,
                                losses.avg,
                                args.result,
                                filename='losslog.txt')
    save_log_graph(log_file=savelogloss)