示例#1
0
文件: train.py 项目: leibo-cmu/MatSeg
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
示例#2
0
def main(args):
    # Defines configuration dictionary and network architecture to use
    config = get_config(args.dataset, args.version)
    method = config['model']

    # Defines the loss function. Takes a tensor as argument to initiate class balancing,
    # which can be obtained from the balance script. Uncomment argument below.
    if config['balance'] and args.gpu and torch.cuda.is_available():
        criterion = nn.CrossEntropyLoss(weight=balance(config)).cuda()
    elif config['balance']:
        criterion = nn.CrossEntropyLoss(weight=balance(config))
    elif args.gpu and torch.cuda.is_available():
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss()

    # Maps configuration method to network class defined in models.py
    try:
        if args.gpu and torch.cuda.is_available():
            model = model_mappings[method](K=config['n_class']).cuda()
        else:
            model = model_mappings[method](K=config['n_class'])
    except KeyError:
        print('%s model does not exist' % method)
        sys.exit(1)

    if args.mode == 'train':
        # Starts training time to be completed at end of conditional statement
        start = time.time()

        # Defines directory for trained network, training log, and training plot
        # respectively; create these directories in MatSeg if this is not already done.
        model_dir = './saved/%s_%s.pth' % (config['name'], method)
        log_dir = './log/%s_%s.log' % (config['name'], method)
        plot_dir = './plots/%s_%s.png' % (config['name'], method)

        # Obtains iterable data sets from function above
        train_loader, validation_loader = get_dataloader(config)

        # Conditional outlining choice of optimizer; includes hard-coded hyperparameters
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(model.parameters(),
                                   lr=config['lr'],
                                   weight_decay=5e-4)
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=config['lr'],
                                  momentum=0.9,
                                  weight_decay=5e-4)
        else:
            print('cannot found %s optimizer' % config['optimizer'])
            sys.exit(1)

        # Defines dynamic learning rate reduction. Patience defines the number of epochs after
        # which to reduce the LR should training loss not decrease in those epochs.
        scheduler = ReduceLROnPlateau(optimizer, patience=config['patience'])

        # Gives entries in the Recorder object to measure; obtained from evaluate function
        recorder = Recorder(('loss_train', 'acc_train', 'loss_val', 'acc_val',
                             'mean_iou', 'class_precision', 'class_iou'))
        iou_val_max = 0

        # Iterate through number of epochs
        for epoch in range(1, config['epoch'] + 1):
            gc.collect()
            print('Epoch %s:' % epoch)
            loss_train, acc_train = train(config,
                                          model,
                                          criterion,
                                          optimizer,
                                          train_loader,
                                          method=method,
                                          gpu=args.gpu)
            loss_val, acc_val, iou_val, class_precision, class_iou = evaluate(
                config,
                model,
                criterion,
                validation_loader,
                gpu=args.gpu,
                method=method)

            # Update learning rate scheduler based on training loss
            scheduler.step(loss_train)

            # Update metrics in Recorder object for each epoch
            recorder.update((loss_train, acc_train, loss_val, acc_val, iou_val,
                             class_precision, class_iou))

            # Save model with higher mean IoU
            if iou_val > iou_val_max and args.save:
                torch.save(recorder.record, log_dir)
                torch.save(
                    {
                        'epoch': epoch,
                        'version': args.version,
                        'model_state_dict': model.state_dict(),
                    }, model_dir)
                print(
                    'validation iou improved from %.5f to %.5f. Model Saved.' %
                    (iou_val_max, iou_val))
                iou_val_max = iou_val

            # Stop training if learning rate is reduced three times or (commented out) if validation loss
            # loss does not decrease for 20 epochs. Otherwise, continue training.
            if (optimizer.param_groups[0]['lr'] / config['lr']) <= 1e-3:
                print('Learning Rate Reduced to 1e-3 of Original Value',
                      'Training Stopped',
                      sep='\n')
                epochs = epoch
                break
            # elif all(recorder['loss_val'][-20:][i] <= recorder['loss_val'][-20:][i+1] for i in range(19)):
            #     print('Loss has not decreased for previous 20 epochs', 'Training Stopped', sep='\n')
            #     epochs = epoch
            #     break
            else:
                epochs = epoch
                continue

        # Obtain time after all epochs, compute total training time, print and plot results
        end = time.time()
        time_taken = end - start
        print(recorder.record)
        plotting(recorder.record, config, start, time_taken, plot_dir, epochs)

    elif args.mode == 'evaluate':
        # Load test data into and iterable dataset with no augmentation and verbose metrics
        test_dir = '%s/%s' % (config['root'], args.test_folder)
        test_set = Dataset(test_dir, config['size'],
                           *get_transform(config, is_train=False))
        test_loader = DataLoader(test_set,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0,
                                 drop_last=False)

        # Load desired trained network from saved directory
        model_dir = './saved/%s_%s.pth' % (config['name'], method)
        model.load_state_dict(torch.load(model_dir)['model_state_dict'])

        # Define directories to which to save predictions and overlays respectively, and create them if necessary
        save_dir = '%s/predictions/%s_%s' % (test_dir, args.version, method)
        overlay_dir = '%s/overlays/%s_%s' % (test_dir, args.version, method)
        labels_dir = os.path.join(test_dir, 'labels_npy')
        if not os.path.isdir('%s/predictions' % test_dir):
            os.mkdir('%s/predictions' % test_dir)
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        evaluate(config,
                 model,
                 criterion,
                 test_loader,
                 gpu=args.gpu,
                 method=method,
                 test_flag=True,
                 save_dir=save_dir)

        # Creates overlays if this is specified in the command line
        if os.path.isdir(labels_dir) and args.overlay:
            if not os.path.isdir(overlay_dir):
                os.makedirs(overlay_dir)
            overlay(labels_dir, save_dir, overlay_dir, config['n_class'])

    else:
        print('%s mode does not exist' % args.mode)
示例#3
0
文件: main.py 项目: arkitahara/MatSeg
def main(args):
    if args.seed:
        np.random.seed(int(args.seed))
        torch.backends.cudnn.deterministic = True
        torch.manual_seed(0)
    config = get_config(args.dataset, args.version)
    method = config['model']
    criterion = nn.CrossEntropyLoss().cuda()
    try:
        model = model_mappings[method](K=config['n_class']).cuda()
    except KeyError:
        print('%s model does not exist' % method)
        sys.exit(1)

    model_dir = './saved/%s_%s.pth' % (config['name'], method)
    if args.mode == 'train':
        log_dir = './log/%s_%s.log' % (config['name'], method)
        train_loader, validation_loader = get_dataloader(config)
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(model.parameters(),
                                   lr=config['lr'],
                                   weight_decay=5e-4)
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=config['lr'],
                                  momentum=0.9,
                                  weight_decay=5e-4)
        else:
            print('cannot found %s optimizer' % config['optimizer'])
            sys.exit(1)

        scheduler = ReduceLROnPlateau(optimizer, patience=3)
        recorder = Recorder(('loss_train', 'acc_train', 'loss_val', 'acc_val'))
        iou_val_max = 0
        for epoch in range(1, config['epoch'] + 1):
            print('Epoch %s:' % epoch)
            loss_train, acc_train = train(config,
                                          model,
                                          criterion,
                                          optimizer,
                                          train_loader,
                                          method=method)
            loss_val, acc_val, iou_val = evaluate(config,
                                                  model,
                                                  criterion,
                                                  validation_loader,
                                                  method=method)
            scheduler.step(loss_train)

            # update loss and accuracy per epoch
            recorder.update((loss_train, acc_train, loss_val, acc_val))

            # save model with higher iou
            if iou_val > iou_val_max and args.save:
                torch.save(recorder.record, log_dir)
                torch.save(
                    {
                        'epoch': epoch,
                        'version': args.version,
                        'model_state_dict': model.state_dict(),
                    }, model_dir)
                print(
                    'validation iou improved from %.5f to %.5f. Model Saved.' %
                    (iou_val_max, iou_val))
                iou_val_max = iou_val

    elif args.mode == 'evaluate':
        test_dir = '%s/%s' % (config['root'], args.test_folder)
        test_set = Dataset(test_dir, config['size'],
                           *get_transform(config, is_train=False))
        test_loader = DataLoader(test_set,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0,
                                 drop_last=False)
        model.load_state_dict(torch.load(model_dir)['model_state_dict'])

        # save prediction results, make directory if not exists
        save_dir = '%s/predictions/%s_%s' % (test_dir, args.version, method)
        if not os.path.isdir('%s/predictions' % test_dir):
            os.mkdir('%s/predictions' % test_dir)
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        evaluate(config,
                 model,
                 criterion,
                 test_loader,
                 method=method,
                 test_flag=True,
                 save_dir=save_dir)

    else:
        print('%s mode does not exist' % args.mode)