Пример #1
0
 def restore_checkpoint(self,opt):
     if opt.from_epoch!=0:
         util.restore_checkpoint_from_epoch(opt,self,["network","optim","sched"])
     elif opt.load is not None:
         util.restore_checkpoint(opt,self,opt.load,["network","optim","sched"])
     elif opt.imagenet_enc or opt.pretrained_dec is not None: pass
     else:
         print(util.magenta("training from scratch..."))
def main(device=torch.device('cuda:0')):
    # CLI arguments
    parser = arg.ArgumentParser(
        description='We all know what we are doing. Fighting!')
    parser.add_argument("--datasize",
                        "-d",
                        default="small",
                        type=str,
                        help="data size you want to use, small, medium, total")
    # Parsing
    args = parser.parse_args()
    # Data loaders
    datasize = args.datasize
    pathname = "data/nyu.zip"
    tr_loader, va_loader, te_loader = getTrainingValidationTestingData(
        datasize, pathname, batch_size=config("unet.batch_size"))

    # Model
    model = Net()

    # define loss function
    # criterion = torch.nn.L1Loss()

    # Attempts to restore the latest checkpoint if exists
    print("Loading unet...")
    model, start_epoch, stats = util.restore_checkpoint(
        model, util.config("unet.checkpoint"))
    acc, loss = util.evaluate_model(model, te_loader, device)
    # axes = util.make_training_plot()
    print(f'Test Accuracy:{acc}')
    print(f'Test Loss:{loss}')
def main(device=torch.device('cuda:0')):
    """Print performance metrics for model at specified epoch."""
    # Data loaders
    pathname = "data/nyu.zip"
    tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname,
                                                                       batch_size=util.config("unet.batch_size"))

    # Model
    model = Net()

    # define loss function
    # criterion = torch.nn.L1Loss()

    # Attempts to restore the latest checkpoint if exists
    print("Loading unet...")
    model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint"))
    acc, loss = util.evaluate_model(model, te_loader, device)
    # axes = util.make_training_plot()
    print(f'Test Accuracy:{acc}')
    print(f'Test Loss:{loss}')
Пример #4
0
 def restore_checkpoint(self, opt):
     util.restore_checkpoint(opt, self, opt.load, ["network"])
Пример #5
0
def main(device=torch.device('cuda:0')):
    # CLI arguments
    parser = arg.ArgumentParser(
        description='We all know what we are doing. Fighting!')
    parser.add_argument("--datasize",
                        "-d",
                        default="small",
                        type=str,
                        help="data size you want to use, small, medium, total")
    # Parsing
    args = parser.parse_args()
    # Data loaders
    datasize = args.datasize
    pathname = "data/nyu.zip"
    tr_loader, va_loader, te_loader = getTrainingValidationTestingData(
        datasize, pathname, batch_size=config("unet.batch_size"))

    # Model
    model = Net()

    # TODO: define loss function, and optimizer
    learning_rate = util.config("unet.learning_rate")
    criterion = DepthLoss(0.1)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    number_of_epoches = 10
    #

    # print("Number of float-valued parameters:", util.count_parameters(model))

    # Attempts to restore the latest checkpoint if exists
    print("Loading unet...")
    model, start_epoch, stats = util.restore_checkpoint(
        model, util.config("unet.checkpoint"))

    # axes = utils.make_training_plot()

    # Evaluate the randomly initialized model
    # evaluate_epoch(
    #     axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats
    # )
    # loss = criterion()

    # initial val loss for early stopping
    # prev_val_loss = stats[0][1]

    running_va_loss = []
    running_va_acc = []
    running_tr_loss = []
    running_tr_acc = []
    # TODO: define patience for early stopping
    # patience = 1
    # curr_patience = 0
    #
    tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device)
    acc, loss = util.evaluate_model(model, va_loader, device)
    running_va_acc.append(acc)
    running_va_loss.append(loss)
    running_tr_acc.append(tr_acc)
    running_tr_loss.append(tr_loss)

    # Loop over the entire dataset multiple times
    # for epoch in range(start_epoch, config('cnn.num_epochs')):
    epoch = start_epoch
    # while curr_patience < patience:
    while epoch < number_of_epoches:
        # Train model
        util.train_epoch(tr_loader, model, criterion, optimizer, device)
        tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device)
        va_acc, va_loss = util.evaluate_model(model, va_loader, device)
        running_va_acc.append(va_acc)
        running_va_loss.append(va_loss)
        running_tr_acc.append(tr_acc)
        running_tr_loss.append(tr_loss)
        # Evaluate model
        # evaluate_epoch(
        #     axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats
        # )

        # Save model parameters
        util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"),
                             stats)

        # update early stopping parameters
        """
        curr_patience, prev_val_loss = early_stopping(
            stats, curr_patience, prev_val_loss
        )
        """

        epoch += 1
    print("Finished Training")
    # Save figure and keep plot open
    # utils.save_training_plot()
    # utils.hold_training_plot()
    util.make_plot(running_tr_loss, running_tr_acc, running_va_loss,
                   running_va_acc)
Пример #6
0
def main(device=torch.device('cuda:0')):
    """Train CNN and show training plots."""
    # Data loaders
    """
    if check_for_augmented_data("./data"):
        tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders(
            task="target", batch_size=config("cnn.batch_size"), augment=True
        )
    else:
        tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders(
            task="target",
            batch_size=config("cnn.batch_size"),
        )
    """
    # pathname = "data/nyu_depth.zip"
    pathname = "data/nyu_small.zip"
    tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname,
                                                                       batch_size=util.config("unet.batch_size"))

    # Model
    model = Net()

    # TODO: define loss function, and optimizer
    learning_rate = util.config("unet.learning_rate")
    criterion = DepthLoss(0.1)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    number_of_epoches = 10
    #

    # print("Number of float-valued parameters:", util.count_parameters(model))

    # Attempts to restore the latest checkpoint if exists
    print("Loading unet...")
    model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint"))

    # axes = utils.make_training_plot()

    # Evaluate the randomly initialized model
    # evaluate_epoch(
    #     axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats
    # )
    # loss = criterion()

    # initial val loss for early stopping
    # prev_val_loss = stats[0][1]

    running_va_loss = []
    running_va_acc = []
    running_tr_loss = []
    running_tr_acc = []
    # TODO: define patience for early stopping
    # patience = 1
    # curr_patience = 0
    #
    tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device)
    acc, loss = util.evaluate_model(model, va_loader, device)
    running_va_acc.append(acc)
    running_va_loss.append(loss)
    running_tr_acc.append(tr_acc)
    running_tr_loss.append(tr_loss)

    # Loop over the entire dataset multiple times
    # for epoch in range(start_epoch, config('cnn.num_epochs')):
    epoch = start_epoch
    # while curr_patience < patience:
    while epoch < number_of_epoches:
        # Train model
        util.train_epoch(tr_loader, model, criterion, optimizer)
        tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device)
        va_acc, va_loss = util.evaluate_model(model, va_loader, device)
        running_va_acc.append(va_acc)
        running_va_loss.append(va_loss)
        running_tr_acc.append(tr_acc)
        running_tr_loss.append(tr_loss)
        # Evaluate model
        # evaluate_epoch(
        #     axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats
        # )

        # Save model parameters
        util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"), stats)

        # update early stopping parameters
        """
        curr_patience, prev_val_loss = early_stopping(
            stats, curr_patience, prev_val_loss
        )
        """

        epoch += 1
    print("Finished Training")
    # Save figure and keep plot open
    # utils.save_training_plot()
    # utils.hold_training_plot()
    util.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
Пример #7
0
    resumable = args.resume and util.is_resumable(args.exp_dir)
    os.makedirs(args.exp_dir, exist_ok=True)
    if not resumable:
        util.save_args(args, args.exp_dir)

    # Seed
    random = np.random.RandomState(args.seed)

    dataloaders, pos_prop = wrappers.load_data(args,
                                               random_state=random,
                                               use_random_transpose=True)
    model, optimizer, loss = wrappers.build_mvae(args, pos_prop=pos_prop)

    # If resume, load metrics; otherwise init metrics
    if resumable:
        util.restore_checkpoint(model, optimizer, args.exp_dir)

        metrics = util.load_metrics(args.exp_dir)
        start_epoch = metrics['current_epoch'] + 1
        print("Resuming from epoch {}".format(metrics['current_epoch']))
    else:
        metrics = init_metrics()
        start_epoch = 1

    if start_epoch > args.epochs:
        raise RuntimeError("start_epoch {} > total epochs {}".format(
            start_epoch, args.epochs))

    # Enumerate subsampled modality combinations
    m_combos = enumerate_combinations(args.n_tracks)