Пример #1
0
def main():
    global args, best_result, output_directory

    # set random seed
    torch.manual_seed(args.manual_seed)
    torch.cuda.manual_seed(args.manual_seed)
    np.random.seed(args.manual_seed)
    random.seed(args.manual_seed)

    print("Let's use GPU ", torch.cuda.current_device())

    val_loader = create_loader(args)
    output_directory = utils.get_output_directory(args)
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(
        '/share2/public/fail_safe/kitti/DeepBlur/result/kitti/run_7/model_best.pth.tar'
    )

    # solve 'out of memory'
    model_sd = checkpoint['model'].state_dict()
    model.load_state_dict(model_sd)
    print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    test(val_loader, model)
    # clear memory
    del checkpoint
    # del model_dict
    torch.cuda.empty_cache()
def main():
    global args, best_result, output_directory, train_csv, test_csv

    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    out_channels = 1

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join('data', args.data, 'train')
    valdir = os.path.join('data', args.data, 'val')

    train_dataset = NYUDataset(traindir,
                               type='train',
                               modality=args.modality,
                               sparsifier=sparsifier)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    # set batch size to be 1 for validation
    val_dataset = NYUDataset(valdir,
                             type='val',
                             modality=args.modality,
                             sparsifier=sparsifier)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    # create new model
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        result, img_merge = validate(val_loader, model, epoch)

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
Пример #3
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        _, val_loader = create_data_loaders(args)
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return
    elif args.crossTrain:
        print("Retraining loaded model on current input parameters")
        train_loader, val_loader = create_data_loaders(args)
        checkpoint = torch.load(args.crossTrain)
        model = checkpoint['model']
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)
        model = model.cuda()

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(val_loader, model,
                                     epoch)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
Пример #4
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # 如果有多GPU 使用多GPU训练
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        args.batch_size = args.batch_size * torch.cuda.device_count()
    else:
        print("Let's use", torch.cuda.current_device())

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
            "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1

        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        _, val_loader = create_data_loaders(args)
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(args.resume))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({})".format(args.arch))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = models.resnet50(pretrained=True)
        elif args.arch == 'resnet18':
            model = models.resnet18(pretrained=True)
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
                                    momentum=args.momentum, weight_decay=args.weight_decay)

        # for multi-gpu training
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model).cuda()
        else:
            model = model.cuda()

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    elif args.criterion == 'berHu':
        criterion = criteria.berHuLoss().cuda()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    best_txt = os.path.join(output_directory, 'best.txt')

    log_path = os.path.join(
        output_directory, 'logs',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer, epoch,
              logger)  # train for one epoch
        result, img_merge = validate(val_loader, model, epoch,
                                     logger)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nrmse={:.3f}\nrml={:.3f}\nlog10={:.3f}\nDelta1={:.3f}\nDelta2={:.3f}\nDelta3={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.rmse, result.absrel, result.lg10,
                            result.delta1, result.delta2, result.delta3,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
Пример #5
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # evaluation mode
    if args.evaluate:

        # Data loading code
        print("=> creating data loaders...")
        valdir = os.path.join('..', 'data', args.data, 'val')

        if args.data == 'nyudepthv2':
            from dataloaders.nyu import NYUDataset
            val_dataset = NYUDataset(valdir,
                                     split='val',
                                     modality=args.modality)
        else:
            raise RuntimeError('Dataset not found.')

        # set batch size to be 1 for validation
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
        print("=> data loaders created.")

        assert os.path.isfile(args.evaluate), \
            "=> no model found at '{}'".format(args.evaluate)
        print("=> loading model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        if type(checkpoint) is dict:
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            model = checkpoint
            args.start_epoch = 0
        output_directory = os.path.dirname(args.evaluate)
        validate(val_loader, model, args.start_epoch, write_to_file=False)
        return

    start_epoch = 0
    if args.train:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))

        model = models.MobileNetSkipAdd(
            output_size=train_loader.dataset.output_size)
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

        # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

        # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

        for epoch in range(start_epoch, args.epochs):
            utils.adjust_learning_rate(optimizer, epoch, args.lr)
            train(train_loader, model, criterion, optimizer,
                  epoch)  # train for one epoch
            result, img_merge = validate(val_loader, model,
                                         epoch)  # evaluate on validation set

            # remember best rmse and save checkpoint
            is_best = result.rmse < best_result.rmse
            if is_best:
                best_result = result
                with open(best_txt, 'w') as txtfile:
                    txtfile.write(
                        "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                        .format(epoch, result.mse, result.rmse, result.absrel,
                                result.lg10, result.mae, result.delta1,
                                result.gpu_time))
                if img_merge is not None:
                    img_filename = output_directory + '/comparison_best.png'
                    utils.save_image(img_merge, img_filename)

            utils.save_checkpoint(
                {
                    'args': args,
                    'epoch': epoch,
                    'arch': args.arch,
                    'model': model,
                    'best_result': best_result,
                    'optimizer': optimizer,
                }, is_best, epoch, output_directory)
Пример #6
0
def main():
    global args, best_result, output_directory

    # set random seed
    torch.manual_seed(args.manual_seed)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        args.batch_size = args.batch_size * torch.cuda.device_count()
    else:
        print("Let's use GPU ", torch.cuda.current_device())

    train_loader, val_loader = create_loader(args)

    if args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)

        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        optimizer = checkpoint['optimizer']

        # model_dict = checkpoint['model'].module.state_dict()  # to load the trained model using multi-GPUs
        # model = FCRN.ResNet(output_size=train_loader.dataset.output_size, pretrained=False)
        # model.load_state_dict(model_dict)

        # solve 'out of memory'
        model = checkpoint['model']

        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

        # clear memory
        del checkpoint
        # del model_dict
        torch.cuda.empty_cache()
    else:
        print("=> creating Model")
        model = FCRN.ResNet(output_size=train_loader.dataset.output_size)
        print("=> model created.")
        start_epoch = 0

        # different modules have different learning rate
        train_params = [{
            'params': model.get_1x_lr_params(),
            'lr': args.lr
        }, {
            'params': model.get_10x_lr_params(),
            'lr': args.lr * 10
        }]

        optimizer = torch.optim.SGD(train_params,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # You can use DataParallel() whether you use Multi-GPUs or not
        model = nn.DataParallel(model).cuda()

    # when training, use reduceLROnPlateau to reduce learning rate
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=args.lr_patience)

    # loss function
    criterion = criteria.MaskedL1Loss()

    # create directory path
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    best_txt = os.path.join(output_directory, 'best.txt')
    config_txt = os.path.join(output_directory, 'config.txt')

    # write training parameters to config file
    if not os.path.exists(config_txt):
        with open(config_txt, 'w') as txtfile:
            args_ = vars(args)
            args_str = ''
            for k, v in args_.items():
                args_str = args_str + str(k) + ':' + str(v) + ',\t\n'
            txtfile.write(args_str)

    # create log
    log_path = os.path.join(
        output_directory, 'logs',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    for epoch in range(start_epoch, args.epochs):

        # remember change of the learning rate
        for i, param_group in enumerate(optimizer.param_groups):
            old_lr = float(param_group['lr'])
            logger.add_scalar('Lr/lr_' + str(i), old_lr, epoch)

        train(train_loader, model, criterion, optimizer, epoch,
              logger)  # train for one epoch
        result, img_merge = validate(val_loader, model, epoch,
                                     logger)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}, rmse={:.3f}, rml={:.3f}, log10={:.3f}, d1={:.3f}, d2={:.3f}, dd31={:.3f}, "
                    "t_gpu={:.4f}".format(epoch, result.rmse, result.absrel,
                                          result.lg10, result.delta1,
                                          result.delta2, result.delta3,
                                          result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        # save checkpoint for each epoch
        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)

        # when rml doesn't fall, reduce learning rate
        scheduler.step(result.absrel)

    logger.close()
Пример #7
0
def main():
    print('Testing data on ' + args.camera + '!')

    assert args.data == 'nyudepthv2', '=> only nyudepthv2 ' \
                                      'available at this ' \
                                      'point'

    to_tensor = transforms.ToTensor()

    assert not (
                args.camera == 'webcam' and not
    args.modality == 'rgb'), '=> webcam only accept RGB ' \
                             'model'

    output_directory = utils.get_output_directory(args)
    best_model_filename = os.path.join(output_directory,
                                       'model_best.pth.tar')
    assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(
            best_model_filename)
    print("=> loading best model '{}'".format(
        best_model_filename))
    checkpoint = torch.load(best_model_filename)
    args.start_epoch = checkpoint['epoch']
    model = checkpoint['model']
    model.eval()

    switch = True

    if args.camera == 'kinect':
        kinect = PyKinectRuntime.PyKinectRuntime(
            PyKinectV2.FrameSourceTypes_Color |
            PyKinectV2.FrameSourceTypes_Depth)
        counter = 0

        assert not kinect._sensor is None, '=> No Kinect ' \
                                           'device ' \
                                           'detected!'

        while True:
            if kinect.has_new_color_frame() and \
                    kinect.has_new_depth_frame():
                bgra_frame = kinect.get_last_color_frame()
                bgra_frame = bgra_frame.reshape((
                                                kinect.color_frame_desc.Height,
                                                kinect.color_frame_desc.Width,
                                                4),
                                                order='C')
                rgb_frame = cv2.cvtColor(bgra_frame,
                                         cv2.COLOR_BGRA2RGB)

                depth_frame = kinect.get_last_depth_frame()

                merged_image, rmse = depth_estimate(model,
                                                    rgb_frame,
                                                    depth_frame,
                                                    save=False)

                merged_image_bgr = cv2.cvtColor(
                    merged_image.astype('uint8'),
                    cv2.COLOR_RGB2BGR, switch)
                switch = False
                cv2.imshow('my webcam',
                           merged_image_bgr.astype('uint8'))
                if counter == 15:
                    print('RMSE = ' + str(rmse))
                counter = counter + 1
                if counter == 16:
                    counter = 0

            if cv2.waitKey(1) == 27:
                break

    elif args.camera == 'webcam':

        cam = cv2.VideoCapture(0)

        while True:

            ret_val, img = cam.read()

            img = cv2.flip(img, 1)

            rgb = cv2.cvtColor(np.array(img),
                               cv2.COLOR_BGRA2RGB)

            transform = transforms.Compose([
                transforms.Resize([228, 304]),
            ])
            rgb_image = transform(rgb)

            if args.modality == 'rgbd':
                assert '=> can\'t test webcam with depth ' \
                       'information!'

            rgb_np = np.asfarray(rgb_image,
                                 dtype='float') / 255
            input_tensor = to_tensor(rgb_np)
            while input_tensor.dim() < 4:
                input_tensor = input_tensor.unsqueeze(0)
            input_tensor = input_tensor.cuda()
            torch.cuda.synchronize()
            end = time.time()
            with torch.no_grad():
                pred = model(input_tensor)
            torch.cuda.synchronize()
            gpu_time = time.time() - end

            pred_depth = np.squeeze(pred.cpu().numpy())

            d_min = np.min(pred_depth)
            d_max = np.max(pred_depth)
            pred_color_map = color_map(pred_depth, d_min,
                                       d_max,
                                       plt.cm.viridis)

            merged_image = np.hstack(
                [rgb_image, pred_color_map])
            merged_image_bgr = cv2.cvtColor(
                merged_image.astype('uint8'),
                cv2.COLOR_RGB2BGR)
            cv2.imshow('my webcam',
                       merged_image_bgr.astype('uint8'))

            if cv2.waitKey(1) == 27:
                break  # esc to quit

    else:
        file_name = args.kinectdata + '.p'
        pickle_path = os.path.join('CameraData', file_name)
        print(pickle_path)

        if not os.path.exists('CameraData'):
            assert '=>do data find at ' + pickle_path

        f = open(pickle_path, 'rb')
        pickle_file = pickle.load(f)
        f.close()

        bgr_frame = pickle_file['rgb']
        depth = pickle_file['depth']

        rgb_frame = cv2.cvtColor(bgr_frame,
                                 cv2.COLOR_BGR2RGB)

        merged_image, rmse = depth_estimate(model,
                                            rgb_frame,
                                            depth,
                                            save=True,
                                            switch=True)
        plt.figure('Merged Image')
        plt.imshow(merged_image.astype('uint8'))
        plt.show()
        print('RMSE = ' + str(rmse))

    cv2.destroyAllWindows()
Пример #8
0
def main():
    global args, best_result, output_directory, train_csv, test_csv
    # Random seed setting
    torch.manual_seed(16)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Data loading code
    print("=> creating data loaders...")
    data_dir = '/media/vasp/Data2/Users/vmhp806/depth-estimation'
    valdir = os.path.join(data_dir, 'data', args.data, 'val')
    traindir = os.path.join(data_dir, 'data', args.data, 'train')

    if args.data == 'nyu' or args.data == 'uow_dataset':
        from dataloaders.nyu import NYUDataset
        val_dataset = NYUDataset(valdir, split='val', modality=args.modality)
        #val_dataset = nc.SafeDataset(val_dataset)
        train_dataset = NYUDataset(traindir,
                                   split='train',
                                   modality=args.modality)
        #train_dataset = nc.SafeDataset(train_dataset)
    else:
        raise RuntimeError('Dataset not found.')

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             collate_fn=my_collate)
    if not args.evaluate:
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   collate_fn=my_collate)
    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no model found at '{}'".format(args.evaluate)
        print("=> loading model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        if type(checkpoint) is dict:
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            model = checkpoint
            args.start_epoch = 0
        output_directory = os.path.dirname(args.evaluate)
        if args.predict:
            predict(val_loader, model, output_directory)
        else:
            validate(val_loader, model, args.start_epoch, write_to_file=False)
        return
        # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint " "'{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        #args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = torch.optim.SGD(model.parameters(), lr=0.9)
        optimizer.load_state_dict(checkpoint['optimizer'])
        #optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        args.resume = True
    else:
        print("=> creating Model ({} - {}) ...".format(args.arch,
                                                       args.decoder))
        #in_channels = len(args.modality)
        if args.arch == 'mobilenet-skipconcat':
            model = models.MobileNetSkipConcat(
                decoder=args.decoder,
                output_size=train_loader.dataset.output_size)
        elif args.arch == 'mobilenet-skipadd':
            model = models.MobileNetSkipAdd(
                decoder=args.decoder,
                output_size=train_loader.dataset.output_size)
        elif args.arch == 'resnet18-skipconcat':
            model = models.ResNetSkipConcat(
                layers=18,
                decoder=args.decoder,
                output_size=train_loader.dataset.output_size)
        elif args.arch == 'resnet18-skipadd':
            model = models.ResNetSkipAdd(
                layers=18, output_size=train_loader.dataset.output_size)
        else:
            raise Exception('Invalid architecture')
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()
        start_epoch = 0
    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
    #start_epoch = 0
    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(
            val_loader, model, epoch,
            write_to_file=True)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                #'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            epoch,
            output_directory)
Пример #9
0
def main():
    torch.cuda.set_device(config.cuda_id)
    global args, best_result, output_directory, train_csv, test_csv, batch_num, best_txt
    best_result = Result()
    best_result.set_to_worst()
    batch_num = 0
    output_directory = utils.get_output_directory(args)

    #-----------------#
    # pytorch version #
    #-----------------#

    try:
        torch._utils._rebuild_tensor_v2
    except AttributeError:

        def _rebuild_tensor_v2(storage, storage_offset, size, stride,
                               requires_grad, backward_hooks):
            tensor = torch._utils._rebuild_tensor(storage, storage_offset,
                                                  size, stride)
            tensor.requires_grad = requires_grad
            tensor._backward_hooks = backward_hooks
            return tensor

        torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    nowTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    file = open(namefile, 'a+')
    file.writelines(
        str("====================================================") +
        str(nowTime) + '\n')
    file.writelines(str("Cuda_id: ") + str(config.cuda_id) + '\n')
    file.writelines(str("NAME: ") + str(config.name) + '\n')
    file.writelines(str("Description: ") + str(config.description) + '\n')
    file.writelines(
        str("model: ") + str(args.arch) + '\n' + str("loss_final: ") +
        str(args.criterion) + '\n' + str("loss_1: ") + str(config.LOSS_1) +
        '\n' + str("batch_size:") + str(args.batch_size) + '\n')
    file.writelines(str("zoom_scale: ") + str(config.zoom_scale) + '\n')
    file.writelines(str("------------------------") + '\n')
    file.writelines(str("Train_dataste: ") + str(config.train_dir) + '\n')
    file.writelines(str("Validation_dataste: ") + str(config.val_dir) + '\n')
    file.writelines(str("------------------------") + '\n')
    file.writelines(str("Input_type: ") + str(config.input) + '\n')
    file.writelines(str("target_type: ") + str(config.target) + '\n')
    file.writelines(str("LOSS--------------------") + '\n')
    file.writelines(str("Loss_num: ") + str(config.loss_num) + '\n')
    file.writelines(
        str("loss_final: ") + str(args.criterion) + '\n' + str("loss_1: ") +
        str(config.LOSS_1) + '\n')
    file.writelines(
        str("loss_0_weight: ") + str(config.LOSS_0_weight) + '\n' +
        str("loss_1_weight: ") + str(config.LOSS_1_weight) + '\n')
    file.writelines(
        str("weight_GT_canny: ") + str(config.weight_GT_canny_loss) + '\n' +
        str("weight_GT_sobel: ") + str(config.weight_GT_sobel_loss) + '\n' +
        str("weight_rgb_sobel: ") + str(config.weight_rgb_sobel_loss) + '\n')
    file.writelines(str("------------------------") + '\n')
    file.writelines(str("target: ") + str(config.target) + '\n')
    file.writelines(str("data_loader_type: ") + str(config.data_loader) + '\n')
    file.writelines(str("lr: ") + str(config.Init_lr) + '\n')
    file.writelines(str("save_fc: ") + str(config.save_fc) + '\n')
    file.writelines(str("Max epoch: ") + str(config.epoch) + '\n')
    file.close()

    # define loss function (criterion) and optimizer,定义误差函数和优化器
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    elif args.criterion == 'l1_canny':
        criterion = criteria.MaskedL1_cannyLoss().cuda()
    #SOBEL
    elif args.criterion == 'l1_from_rgb_sobel':
        criterion = criteria.MaskedL1_from_rgb_sobel_Loss().cuda()
    elif args.criterion == 'l1_from_GT_rgb_sobel':
        criterion = criteria.MaskedL1_from_GT_rgb_sobel_Loss().cuda()
    elif args.criterion == 'l1_from_GT_sobel':
        criterion = criteria.MaskedL1_from_GT_sobel_Loss().cuda()
    elif args.criterion == 'l2_from_GT_sobel_Loss':
        criterion = criteria.MaskedL2_from_GT_sobel_Loss().cuda()
    #CANNY
    elif args.criterion == 'l1_canny_from_GT_canny':
        criterion = criteria.MaskedL1_canny_from_GT_Loss().cuda()

    # Data loading code
    print("=> creating data loaders ...")
    train_dir = config.train_dir
    val_dir = config.val_dir
    train_dataset = YT_dataset(train_dir, config, is_train_set=True)
    val_dataset = YT_dataset(val_dir, config, is_train_set=False)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        sampler=None,
        worker_init_fn=lambda work_id: np.random.seed(work_id))
    # worker_init_fn ensures different sampling patterns for each data loading thread

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    print("=> data loaders created.")

    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        validate(val_loader,
                 model,
                 checkpoint['epoch'],
                 1,
                 write_to_file=False)
        return

    elif args.test:
        print("testing...")
        best_model_filename = best_model_dir
        assert os.path.isfile(best_model_filename), \
            "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        optimizer = checkpoint['optimizer']
        for state in optimizer.state.values():
            for k, v in state.items():
                print(type(v))
                if torch.is_tensor(v):
                    state[k] = v.cuda()

        #test(val_loader, model, checkpoint['epoch'], write_to_file=False)
        test(model)
        return

    elif args.resume:
        assert os.path.isfile(config.resume_model_dir), \
            "=> no checkpoint found at '{}'".format(config.resume_model_dir)
        print("=> loading checkpoint '{}'".format(config.resume_model_dir))
        best_model_filename = config.resume_model_dir
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        for state in optimizer.state.values():
            for k, v in state.items():
                #print(type(v))
                if torch.is_tensor(v):
                    state[k] = v.cuda(config.cuda_id)

        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    else:
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        if config.input == 'RGBT':
            in_channels = 4
        elif config.input == 'YT':
            in_channels = 2
        else:
            print("Input type is wrong !")
            return 0
        if args.arch == 'resnet50':  #调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet50_deconv1_loss0':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_with_deconv(layers=50,
                                       decoder=args.decoder,
                                       output_size=train_dataset.output_size,
                                       in_channels=in_channels,
                                       pretrained=args.pretrained)
        elif args.arch == 'resnet50_deconv1_loss1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_with_deconv_loss(
                layers=50,
                decoder=args.decoder,
                output_size=train_dataset.output_size,
                in_channels=in_channels,
                pretrained=args.pretrained)
        elif args.arch == 'resnet50_direct_deconv1_loss1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_with_direct_deconv(
                layers=50,
                decoder=args.decoder,
                output_size=train_dataset.output_size,
                in_channels=in_channels,
                pretrained=args.pretrained)
        elif args.arch == 'resnet50_1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_1(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_2':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_2(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_3':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_3(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_3_1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_3_1(layers=50,
                               decoder=args.decoder,
                               output_size=train_dataset.output_size,
                               in_channels=in_channels,
                               pretrained=args.pretrained)
        elif args.arch == 'resnet50_3_2':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_3_2(layers=50,
                               decoder=args.decoder,
                               output_size=train_dataset.output_size,
                               in_channels=in_channels,
                               pretrained=args.pretrained)
        elif args.arch == 'resnet50_3_3':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_3_3(layers=50,
                               decoder=args.decoder,
                               output_size=train_dataset.output_size,
                               in_channels=in_channels,
                               pretrained=args.pretrained)
        elif args.arch == 'resnet50_4':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_4(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_5':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_5(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_7':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_7(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_8':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_8(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_9':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_9(layers=50,
                             decoder=args.decoder,
                             output_size=train_dataset.output_size,
                             in_channels=in_channels,
                             pretrained=args.pretrained)
        elif args.arch == 'resnet50_10':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_10(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_11':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_11(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_11_1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_11_1(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_11_without_pretrain':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_11_without_pretrain(
                layers=50,
                decoder=args.decoder,
                output_size=train_dataset.output_size,
                in_channels=in_channels,
                pretrained=args.pretrained)
        elif args.arch == 'resnet50_12':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_12(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_13':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_13(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_14':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_14(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_15':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_16':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_16(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_17':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_17(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_18':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet50_18(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_30':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_30(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_31':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_31(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_32':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_32(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_33':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_33(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_40':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_40(layers=50,
                              decoder=args.decoder,
                              output_size=train_dataset.output_size,
                              in_channels=in_channels,
                              pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_1':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_1(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_2':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_2(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_3':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_3(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_4':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_4(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_5':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_5(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_6':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_6(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_8':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_8(layers=34,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_9':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_9(layers=50,
                                decoder=args.decoder,
                                output_size=train_dataset.output_size,
                                in_channels=in_channels,
                                pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_10':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_10(layers=50,
                                 decoder=args.decoder,
                                 output_size=train_dataset.output_size,
                                 in_channels=in_channels,
                                 pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_11':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_11(layers=50,
                                 decoder=args.decoder,
                                 output_size=train_dataset.output_size,
                                 in_channels=in_channels,
                                 pretrained=args.pretrained)
        elif args.arch == 'resnet50_15_12':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_15_12(layers=50,
                                 decoder=args.decoder,
                                 output_size=train_dataset.output_size,
                                 in_channels=in_channels,
                                 pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet50_20':
            model = ResNet50_20(Bottleneck, [3, 4, 6, 3])
        elif args.arch == 'UNet':
            model = UNet()
        elif args.arch == 'UP_only':
            model = UP_only()
        elif args.arch == 'ResNet_bicubic':  # 调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet_bicubic(layers=50,
                                   decoder=args.decoder,
                                   output_size=train_dataset.output_size,
                                   in_channels=in_channels,
                                   pretrained=args.pretrained)
        elif args.arch == 'VDSR':
            model = VDSR()
        elif args.arch == 'VDSR_without_res':
            model = VDSR_without_res()
        elif args.arch == 'VDSR_16':
            model = VDSR_16()
        elif args.arch == 'VDSR_16_2':
            model = VDSR_16_2()
        elif args.arch == 'Leon_resnet50':
            model = Leon_resnet50()
        elif args.arch == 'Leon_resnet101':
            model = Leon_resnet101()
        elif args.arch == 'Leon_resnet18':
            model = Leon_resnet18()
        elif args.arch == 'Double_resnet50':
            model = Double_resnet50()
        print("=> model created.")

        if args.finetune:
            print("===============loading finetune model=====================")
            assert os.path.isfile(config.fitune_model_dir), \
            "=> no checkpoint found at '{}'".format(config.fitune_model_dir)
            print("=> loading checkpoint '{}'".format(config.fitune_model_dir))
            best_model_filename = config.fitune_model_dir
            checkpoint = torch.load(best_model_filename)
            args.start_epoch = checkpoint['epoch'] + 1
            #best_result = checkpoint['best_result']
            model_fitune = checkpoint['model']
            model_fitune_dict = model_fitune.state_dict()
            model_dict = model.state_dict()
            for k in model_fitune_dict:
                if k in model_dict:
                    #print("There is model k: ",k)
                    model_dict[k] = model_fitune_dict[k]
            #model_dict={k:v for k,v in model_fitune_dict.items() if k in model_dict}
            model_dict.update(model_fitune_dict)
            model.load_state_dict(model_dict)

            #optimizer = checkpoint['optimizer']
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))

        #optimizer = torch.optim.SGD(model.parameters(), args.lr,momentum=args.momentum, weight_decay=args.weight_decay)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     amsgrad=True,
                                     weight_decay=args.weight_decay)
        '''
        optimizer = torch.optim.Adam(
        [
            #{'params':model.base.parameters()}, 3
            {'params': model.re_conv_Y_1.parameters(),'lr':0.0001},
            {'params': model.re_conv_Y_2.parameters(), 'lr': 0.0001},
            {'params': model.re_conv_Y_3.parameters(), 'lr': 0.0001},
            #3
            {'params': model.re_deconv_up0.parameters(), 'lr': 0.0001},
            {'params': model.re_deconv_up1.parameters(), 'lr': 0.0001},
            {'params': model.re_deconv_up2.parameters(), 'lr': 0.0001},
            #3
            {'params': model.re_conv1.parameters(), 'lr': 0.0001},
            {'params': model.re_bn1.parameters(), 'lr': 0.0001},
            {'params': model.re_conv4.parameters(), 'lr': 0.0001},
            #5
            {'params': model.re_ResNet50_layer1.parameters(), 'lr': 0.0001},
            {'params': model.re_ResNet50_layer2.parameters(), 'lr': 0.0001},
            {'params': model.re_ResNet50_layer3.parameters(), 'lr': 0.0001},
            {'params': model.re_ResNet50_layer4.parameters(), 'lr': 0.0001},

            {'params': model.re_bn2.parameters(), 'lr': 0.0001},
            #5
            {'params': model.re_deconcv_res_up1.parameters(), 'lr': 0.0001},
            {'params': model.re_deconcv_res_up2.parameters(), 'lr': 0.0001},
            {'params': model.re_deconcv_res_up3.parameters(), 'lr': 0.0001},
            {'params': model.re_deconcv_res_up4.parameters(), 'lr': 0.0001},

            {'params': model.re_deconv_last.parameters(), 'lr': 0.0001},
            #denoise net 3
            {'params': model.conv_denoise_1.parameters(), 'lr': 0},
            {'params': model.conv_denoise_2.parameters(), 'lr': 0},
            {'params': model.conv_denoise_3.parameters(), 'lr': 0}
        ]
        , lr=args.lr, amsgrad=True, weight_decay=args.weight_decay)
        '''
        for state in optimizer.state.values():
            for k, v in state.items():
                print(type(v))
                if torch.is_tensor(v):
                    state[k] = v.cuda(config.cuda_id)
        print(optimizer)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()


#    writer = SummaryWriter(log_dir='logs')

    model = model.cuda(config.cuda_id)
    #torch.save(model, './net1.pkl')
    for state in optimizer.state.values():
        for k, v in state.items():
            print(type(v))
            if torch.is_tensor(v):
                state[k] = v.cuda()

    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        train(train_loader, val_loader, model, criterion, optimizer, epoch,
              args.lr)  # train for one epoch
Пример #10
0
def main():
    global args, best_result, output_directory

    if torch.cuda.device_count() > 1:
        args.batch_size = args.batch_size * torch.cuda.device_count()
        train_loader = NYUDepth_loader(args.data_path,
                                       batch_size=args.batch_size,
                                       isTrain=True)
        val_loader = NYUDepth_loader(args.data_path,
                                     batch_size=args.batch_size,
                                     isTrain=False)
    else:
        train_loader = NYUDepth_loader(args.data_path,
                                       batch_size=args.batch_size,
                                       isTrain=True)
        val_loader = NYUDepth_loader(args.data_path, isTrain=False)

    if args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        # args = checkpoint['args']
        # print('保留参数:', args)
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        if torch.cuda.device_count() > 1:
            model_dict = checkpoint['model'].module.state_dict(
            )  # 如果是多卡训练的要加module
        else:
            model_dict = checkpoint['model'].state_dict()
        model = DORN_nyu.DORN()
        model.load_state_dict(model_dict)
        del model_dict  # 删除载入的模型
        # 使用SGD进行优化
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum)

        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
    else:
        print("=> creating Model")
        model = DORN_nyu.DORN()
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum)
        start_epoch = 0
    # 如果有多GPU 使用多GPU训练
    if torch.cuda.device_count():
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model = model.cuda()

    # 定义loss函数
    criterion = criteria.ordLoss()

    # 创建保存结果目录文件
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    best_txt = os.path.join(output_directory, 'best.txt')

    log_path = os.path.join(
        output_directory, 'logs',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    for epoch in range(start_epoch, args.epochs):
        # lr = utils.adjust_learning_rate(optimizer, args.lr, epoch)  # 更新学习率

        train(train_loader, model, criterion, optimizer, epoch,
              logger)  # train for one epoch
        result, img_merge = validate(val_loader, model, epoch,
                                     logger)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nrmse={:.3f}\nrml={:.3f}\nlog10={:.3f}\nd1={:.3f}\nd2={:.3f}\ndd31={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.rmse, result.absrel, result.lg10,
                            result.delta1, result.delta2, result.delta3,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        # 每个Epoch都保存解雇
        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)

    logger.close()
Пример #11
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        f"[Error] Can't find the specified checkpoint at '{args.evaluate}'"
        print(f"[Info] loading the model '{args.evaluate}'")
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        print(args)
        train_loader, val_loader = create_data_loaders(args)
        model_weights = checkpoint['model_state_dict']
        # Create model
        if args.arch == "resnet18_multistage_uncertainty" or \
           args.arch == "resnet18_multistage_uncertainty_fixs":
            model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size)
        else:
            model = create_model(args, output_size=train_loader.dataset.output_size)
            loss_weights = None
        model.load_state_dict(model_weights, strict=False)
        model = model.cuda()
        print(f"[Info] Loaded best model (epoch {checkpoint['epoch']})")
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            f"[Info] No checkpoint found at '{chkpt_path}'"
        print(f"=> loading checkpoint '{chkpt_path}'")
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        print(args)
        start_epoch = checkpoint['epoch'] + 1
        try:
            best_result = checkpoint['best_result']
        except:
            best_result.set_to_worst()

        # Create dataloader first
        args.validation = True
        args.workers = 8

        if (args.data == "nuscenes") and (args.modality == "rgbd") and (args.sparsifier == "uar"):
            args.sparsifier = None
        # Create dataloader
        if args.validation:
            train_loader, val_loader = create_data_loaders(args)
        else:
            train_loader = create_data_loaders(args)
        # Load from model's state dict instead
        model_weights = checkpoint['model_state_dict']
        # Create model
        if args.arch == "resnet18_multistage_uncertainty" or \
           args.arch == "resnet18_multistage_uncertainty_fixs":
            model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size)
        else:
            model = create_model(args, output_size=train_loader.dataset.output_size)
            loss_weights = None
        model.load_state_dict(model_weights, strict=False)
        model = model.cuda()

        # Create optimizer
        optimizer = torch.optim.SGD(
            model.parameters(), 
            args.lr,
            momentum=args.momentum, 
            weight_decay=args.weight_decay
        )
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        args.resume = True
    # Create new model
    else:
        print(args)
        # Create dataloader
        if args.validation:
            train_loader, val_loader = create_data_loaders(args)
        else:
            train_loader = create_data_loaders(args)

        # Create model
        if args.arch == "resnet18_multistage_uncertainty" or \
           args.arch == "resnet18_multistage_uncertainty_fixs":
            model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size)
        else:
            model = create_model(args, output_size=train_loader.dataset.output_size)
            loss_weights = None

        # Create optimizer
        optimizer = torch.optim.SGD(
            model.parameters(), 
            args.lr,
            momentum=args.momentum, 
            weight_decay=args.weight_decay
        )
        model = model.cuda()

    # Define loss function (criterion) and optimizer
    criterion = {}
    if args.criterion == 'l2':
        criterion["depth"] = MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion["depth"] = MaskedL1Loss().cuda()
    else:
        raise ValueError("[Error] Unknown criterion...")
    
    # Add smoothness loss to the criterion
    if args.arch == "resnet18_multistage_uncertainty" or \
       args.arch == "resnet18_multistage_uncertainty_fixs":
        criterion["smooth"] = SmoothnessLoss().cuda()

    # Create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # Create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # Create summary writer
    log_path = os.path.join(output_directory, "logs")
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    # Main training loop
    for epoch in range(start_epoch, args.epochs):
        # Adjust the learning rate
        utils.adjust_learning_rate(optimizer, epoch, args.lr)

        # Record the learning rate summary
        for i, param_group in enumerate(optimizer.param_groups):
            old_lr = float(param_group['lr'])
            logger.add_scalar('Lr/lr_' + str(i), old_lr, epoch)

        # Perform training (train for one epoch)
        train(train_loader, model, criterion, optimizer, epoch, loss_weights, logger=logger)

        # Perform evaluation
        if args.validation:
            result, img_merge = validate(val_loader, model, epoch, logger=logger)

            is_best = result.rmse < best_result.rmse
            if is_best:
                best_result = result
                with open(best_txt, 'w') as txtfile:
                    txtfile.write("epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n".
                        format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time))
                if img_merge is not None:
                    img_filename = output_directory + '/comparison_best.png'
                    utils.save_image(img_merge, img_filename)

        # Save different things in different mode
        if args.validation:
            utils.save_checkpoint({
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model_state_dict': model.state_dict(),
                'best_result': best_result,
                'optimizer_state_dict' : optimizer.state_dict(),
            }, is_best, epoch, output_directory)
        else:
            utils.save_checkpoint({
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, False, epoch, output_directory)
Пример #12
0
def main():
    global args, best_result, output_directory, train_csv, test_csv
    print(args)
    start = 0

    # evaluation mode
    if args.evaluate:
        datasets = configuration_file.datasets_path
        valdir = os.path.join(datasets, args.data, 'val')
        print("Validation directory ", valdir)
        if args.data == 'nyudepthv2':
            from dataloaders.nyu import NYUDataset
            val_dataset = NYUDataset(valdir,
                                     split='val',
                                     modality=args.modality)
        else:
            raise RuntimeError('Dataset not found.')

        #set batch size to be 1 for validation
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
        print("=> validation loaders created.")
        assert os.path.isfile(args.evaluate), \
            "=> no model found at '{}'".format(args.evaluate)
        print("=> loading model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        if type(checkpoint) is dict:
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            model = checkpoint
            args.start_epoch = 0
        output_directory = os.path.dirname(args.evaluate)
        validate(val_loader, model, args.start_epoch, write_to_file=False)

        return

    # resume from a particular check point
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(
            chkpt_path), "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1  # load epoch number
        start = start_epoch  # resume from the checkpoint epoch
        best_result = checkpoint['best_result']  # load best result
        model = checkpoint['model']  # load model
        optimizer = checkpoint['optimizer']  # load optimizer
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(
            args)  # create data loader
        args.resume = True

    # create new model if checkpoint does not exist
    elif args.train:
        train_loader, val_loader = create_data_loaders(
            args)  # load train and validation data
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'MobileNet':  # if encoder is MobileNet
            model = models.MobileNetSkipAdd(
                output_size=train_loader.dataset.output_size
            )  # MobileNet model is created
        else:
            model = models.MobileNetSkipAdd(
                output_size=train_loader.dataset.output_size
            )  # by default MobileNet

        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
                                    momentum=args.momentum, weight_decay=args.weight_decay) # configure optimizer

        if configuration_file.GPU == True:
            if configuration_file.MULTI_GPU == True:  # training on multiple GPU
                model = torch.nn.DataParallel(model).cuda()
            else:  # training on single GPU
                model = model.cuda()
        else:
            pass

    # define loss function  and optimizer
    if args.criterion == 'l2':
        if configuration_file.GPU == True:
            criterion = MaskedMSELoss().cuda()
        else:
            criterion = MaskedMSELoss()
    elif args.criterion == 'l1':
        if configuration_file.GPU == True:
            criterion = MaskedL1Loss().cuda()
        else:
            criterion = MaskedL1Loss()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)

    if not os.path.exists(output_directory):  # create new directory
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory,
                             'train.csv')  # store training result
    test_csv = os.path.join(output_directory, 'test.csv')  # store test result
    best_txt = os.path.join(output_directory, 'best.txt')  # store best result

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # training is strarted from here
    for epoch in range(start, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(val_loader, model,
                                     epoch)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse  # compare result of the current epoch and best result
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
Пример #13
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    #建立文件
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer,定义误差函数和优化器
    if args.criterion == 'l2':
        #均方差
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

    # sparsifier is a class for generating random sparse depth input from the ground truth
    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join('data', args.data, 'train')
    valdir = os.path.join('data', args.data, 'val')

    if args.data == 'nyudepthv2':
        #需要的时候才把函数载入
        from dataloaders.nyu_dataloader import NYUDataset
        train_dataset = NYUDataset(traindir,
                                   type='train',
                                   modality=args.modality,
                                   sparsifier=sparsifier)
        val_dataset = NYUDataset(valdir,
                                 type='val',
                                 modality=args.modality,
                                 sparsifier=sparsifier)

    elif args.data == 'kitti':
        from dataloaders.kitti_dataloader import KITTIDataset
        train_dataset = KITTIDataset(traindir,
                                     type='train',
                                     modality=args.modality,
                                     sparsifier=sparsifier)
        val_dataset = KITTIDataset(valdir,
                                   type='val',
                                   modality=args.modality,
                                   sparsifier=sparsifier)

    else:
        raise RuntimeError(
            'Dataset not found.' +
            'The dataset must be either of nyudepthv2 or kitti.')

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        sampler=None,
        worker_init_fn=lambda work_id: np.random.seed(work_id))
    # worker_init_fn ensures different sampling patterns for each data loading thread

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    print("=> data loaders created.")

    # evaluation mode,测试模式,拿最好的效果进行测试
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    # create new model,建立模型,并且训练
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(
            args.modality)  #in_channels是modality的长度,如果输入rgbd那么就是4通道。
        #这一边只提供了两个选择50或者18
        if args.arch == 'resnet50':  #调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
    model = model.cuda()
    # print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(
            val_loader, model,
            epoch)  # evaluate on validation set,每次训练完以后都要测试一下

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
Пример #14
0
def main():
    global args, best_result, output_directory

    # set random seed
    torch.manual_seed(args.manual_seed)
    torch.cuda.manual_seed(args.manual_seed)
    np.random.seed(args.manual_seed)
    random.seed(args.manual_seed)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        args.batch_size = args.batch_size * torch.cuda.device_count()
    else:
        print("Let's use GPU ", torch.cuda.current_device())

    train_loader, val_loader = create_loader(args)

    if args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        start_epoch = 0
        # start_epoch = checkpoint['epoch'] + 1
        # best_result = checkpoint['best_result']
        # optimizer = checkpoint['optimizer']

        # solve 'out of memory'
        model = checkpoint['model']
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     betas=(0.9, 0.999))
        # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

        # clear memory
        del checkpoint
        # del model_dict
        torch.cuda.empty_cache()
    else:
        print("=> creating Model")
        # input_shape = [args.batch_size,3,256,512]
        model = UNet(3, 1)
        print("=> model created.")
        start_epoch = 0

        print('Number of model parameters: {}'.format(
            sum([p.data.nelement() for p in model.parameters()])))

        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     betas=(0.9, 0.999))
        # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

        # You can use DataParallel() whether you use Multi-GPUs or not
        model = nn.DataParallel(model).cuda()

    # when training, use reduceLROnPlateau to reduce learning rate
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=args.lr_patience)

    # loss function
    criterion = criteria.myL1Loss()
    # criterion = nn.SmoothL1Loss()
    # create directory path
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    best_txt = os.path.join(output_directory, 'best.txt')
    config_txt = os.path.join(output_directory, 'config.txt')

    # write training parameters to config file
    if not os.path.exists(config_txt):
        with open(config_txt, 'w') as txtfile:
            args_ = vars(args)
            args_str = ''
            for k, v in args_.items():
                args_str = args_str + str(k) + ':' + str(v) + ',\t\n'
            txtfile.write(args_str)

    for epoch in range(start_epoch, args.epochs):

        # remember change of the learning rate
        old_lr = 0.0
        # adjust_learning_rate(optimizer,epoch)
        for i, param_group in enumerate(optimizer.param_groups):
            old_lr = float(param_group['lr'])
        print("lr: %f" % old_lr)

        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(val_loader, model,
                                     epoch)  # evaluate on validation set

        # remember best mae and save checkpoint
        is_best = result.mae < best_result.mae
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write("epoch={}, mae={:.3f}, "
                              "t_gpu={:.4f}".format(epoch, result.mae,
                                                    result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        # save checkpoint for each epoch
        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)

        # when mae doesn't fall, reduce learning rate
        scheduler.step(result.mae)
Пример #15
0
def main():
    global args, best_result, output_directory, train_csv, test_csv
    # Data loading code
    datasets = '/content/drive/MyDrive'
    #valdir = os.path.join(datasets, 'Datasets', args.data, 'val')
    #valdir = '/content/drive/MyDrive/Datasets/Nyudepthv2Previous/nyudepthv2/val/official/'
    valdir = os.path.join(datasets, 'Datasets', 'Nyudepthv2Previous',
                          args.data, 'val')

    if args.data == 'nyudepthv2':
        from dataloaders.nyu import NYUDataset
        val_dataset = NYUDataset(valdir, split='val', modality=args.modality)

    elif args.data == 'kitti':
        from dataloaders.kitti import KITTIDataset
        val_dataset = KITTIDataset(valdir, type='val', modality=args.modality)
    else:
        raise RuntimeError('Dataset not found.')

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    print("=> validation loaders created.")

    # evaluation mode
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no model found at '{}'".format(args.evaluate)
        print("=> loading model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        if type(checkpoint) is dict:
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            model = checkpoint
            args.start_epoch = 0
        output_directory = os.path.dirname(args.evaluate)
        validate(val_loader, model, args.start_epoch, write_to_file=False)

        return

    # training mode
    # resume from a check point
    elif args.resume:
        print("Resume")
        chkpt_path = args.resume
        assert os.path.isfile(
            chkpt_path), "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    elif args.train:
        print("Inside Train 1----------->")
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'MobileNet':
            #model = models.MobileNetSkipAdd(output_size=train_loader.dataset.output_size)
            model = ResNetSkipAdd(layers=50,
                                  output_size=train_loader.dataset.output_size,
                                  in_channels=in_channels,
                                  pretrained=args.pretrained)
            #print("Mobile Net model ",str(train_loader.dataset.output_size)
        elif args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)

        else:
            model = models.MobileNetSkipAdd(output_size=train_loader.dataset.
                                            output_size)  #by default MobileNet

        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
                                    momentum=args.momentum, weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

        # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = MaskedL1Loss().cuda()

        # create results folder, if not already exists
    print("Arguments ")
    print(args)
    output_directory = utils.get_output_directory(args)

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    #Training is strarted from here
    if args.train == True:
        print("Training...........(args.train)", args.train)
        start = 0
        for epoch in range(start, args.epochs):
            utils.adjust_learning_rate(optimizer, epoch, args.lr)
            train(train_loader, model, criterion, optimizer,
                  epoch)  # train for one epoch
            result, img_merge = validate(val_loader, model,
                                         epoch)  # evaluate on validation set

            # remember best rmse and save checkpoint
            is_best = result.rmse < best_result.rmse
            if is_best:
                best_result = result
                with open(best_txt, 'w') as txtfile:
                    txtfile.write(
                        "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                        .format(epoch, result.mse, result.rmse, result.absrel,
                                result.lg10, result.mae, result.delta1,
                                result.gpu_time))
                if img_merge is not None:
                    img_filename = output_directory + '/comparison_best.png'
                    utils.save_image(img_merge, img_filename)

            utils.save_checkpoint(
                {
                    'args': args,
                    'epoch': epoch,
                    'arch': args.arch,
                    'model': model,
                    'best_result': best_result,
                    'optimizer': optimizer,
                }, is_best, epoch, output_directory)

    elif args.resume == True:
        print("Resume......................")
        start = start_epoch
        for epoch in range(start, args.epochs):
            print("Epoch inside resume ", epoch)
            utils.adjust_learning_rate(optimizer, epoch, args.lr)
            train(train_loader, model, criterion, optimizer,
                  epoch)  # train for one epoch
            result, img_merge = validate(val_loader, model,
                                         epoch)  # evaluate on validation set

            # remember best rmse and save checkpoint
            is_best = result.rmse < best_result.rmse
            if is_best:
                best_result = result
                with open(best_txt, 'w') as txtfile:
                    txtfile.write(
                        "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                        .format(epoch, result.mse, result.rmse, result.absrel,
                                result.lg10, result.mae, result.delta1,
                                result.gpu_time))
                if img_merge is not None:
                    img_filename = output_directory + '/comparison_best.png'
                    utils.save_image(img_merge, img_filename)

            utils.save_checkpoint(
                {
                    'args': args,
                    'epoch': epoch,
                    'arch': args.arch,
                    'model': model,
                    'best_result': best_result,
                    'optimizer': optimizer,
                }, is_best, epoch, output_directory)
Пример #16
0
def main():
    global args, output_directory, train_csv, test_csvs, mm_scaler
    # MinMax-Scaler!
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        _, val_loader = create_data_loaders(args, mm_scaler)
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args, mm_scaler)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args, mm_scaler)
        print("=> creating Model ({}) ...".format(args.arch))
        from models.rnn_model import Model
        if args.arch == 'LSTM':
            model = Model(input_dim=args.x_dim,
                          hidden_dim=args.hidden_size,
                          Y_target=args.y_target,
                          model_type="lstm")
        elif args.arch == 'GRU':
            model = Model(input_dim=args.x_dim,
                          hidden_dim=args.hidden_size,
                          Y_target=args.y_target,
                          model_type="gru")
        if args.arch == 'RNN':
            model = Model(input_dim=args.x_dim,
                          hidden_dim=args.hidden_size,
                          Y_target=args.y_target,
                          model_type="rnn")
        print("=> model created.")

        model_parameters = list(model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        print("Num. of parameters: ", params)

        optimizer = torch.optim.Adam(model.parameters(),
                                     args.lr,
                                     weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

    criterion = nn.MSELoss().cuda()
    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csvs = []
    for i in range(NUM_VAL_CSVS):
        test_csv_name = 'test_' + str(i) + '.csv'
        test_csv_each = os.path.join(output_directory, test_csv_name)
        test_csvs.append(test_csv_each)
    test_csv_total = os.path.join(output_directory, 'test.csv')
    test_csvs.append(test_csv_total)

    # 1 indicates total
    assert NUM_VAL_CSVS + 1 == len(test_csvs), "Something's wrong!"

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=[])
            writer.writeheader()
        for test_csv in test_csvs:
            with open(test_csv, 'w') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

    best_rmse = 1000000000

    print("=> Learning start.")
    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr, args.decay_rate,
                                   args.decay_step)
        print("=> On training...")
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        if epoch % args.validation_interval == 0:
            print("=> On validating...")
            result_rmse, results_list = validate(
                val_loader, model, epoch)  # evaluate on validation set
            # Save validation results
            print("=> On drawing results...")
            pngname = os.path.join(
                output_directory,
                str(epoch).zfill(2) + "_" + str(round(result_rmse, 5)) +
                ".png")
            utils.plot_trajectory(pngname, results_list[:-1])
            is_best = best_rmse > result_rmse
            if is_best:
                best_rmse = result_rmse
                best_name = os.path.join(output_directory, "best.csv")
                with open(best_name, 'w', newline='') as csvfile:
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    writer.writeheader()
                    for result_container in results_list:
                        avg = result_container.result
                        writer.writerow({
                            'rmse': avg.rmse,
                            'mean': avg.mean,
                            'median': avg.median,
                            'var': avg.var,
                            'max': avg.error_max
                        })

                    writer.writerow({
                        'rmse': epoch,
                        'mean': 0,
                        'median': 0,
                        'var': 0,
                        'max': 0
                    })

                utils.save_output(results_list, epoch, output_directory)
                utils.save_checkpoint(
                    {
                        'args': args,
                        'epoch': epoch,
                        'arch': args.arch,
                        'model': model,
                        'optimizer': optimizer,
                        'scaler': mm_scaler
                    }, is_best, epoch, output_directory)