示例#1
0
def main():
    num_classes = 3

    # larger model
    if model_choice == 'unet':
        model = Unet(feature_scale=feature_scale,
                     n_classes=num_classes,
                     is_deconv=True,
                     in_channels=3,
                     is_batchnorm=True)
    # year 2 best solution XD_XD's model, as the baseline model
    elif model_choice == 'unet_baseline':
        model = UnetBaseline(feature_scale=feature_scale,
                             n_classes=num_classes,
                             is_deconv=True,
                             in_channels=3,
                             is_batchnorm=True)
    else:
        sys.exit(
            'Invalid model_choice {}, choose unet_baseline or unet'.format(
                model_choice))

    # can also use Nesterov momentum in optim.SGD
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate,
    #                     momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_model(model,
                optimizer,
                epochs=total_epochs,
                print_every=print_every,
                checkpoint_path=starting_checkpoint_path)
示例#2
0
def main():
    num_classes = 3

    # create checkpoint dir
    checkpoint_dir = 'checkpoints/{}'.format(experiment_name)
    os.makedirs(checkpoint_dir, exist_ok=True)

    logger_train = Logger('logs/{}/train'.format(experiment_name))
    logger_val = Logger('logs/{}/val'.format(experiment_name))
    log_sample_img_gt(sample_images_train, sample_images_val, logger_train,
                      logger_val)
    logging.info('Logged ground truth image samples')

    # larger model
    if model_choice == 'unet':
        model = Unet(feature_scale=feature_scale,
                     n_classes=num_classes,
                     is_deconv=True,
                     in_channels=3,
                     is_batchnorm=True)
    # year 2 best solution XD_XD's model, as the baseline model
    elif model_choice == 'unet_baseline':
        model = UnetBaseline(feature_scale=feature_scale,
                             n_classes=num_classes,
                             is_deconv=True,
                             in_channels=3,
                             is_batchnorm=True)
    else:
        sys.exit(
            'Invalid model_choice {}, choose unet_baseline or unet'.format(
                model_choice))

    model = model.to(device=device,
                     dtype=dtype)  # move the model parameters to CPU/GPU

    criterion = nn.CrossEntropyLoss(weight=loss_weights).to(device=device,
                                                            dtype=dtype)

    # can also use Nesterov momentum in optim.SGD
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate,
    #                     momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # resume from a checkpoint if provided
    starting_epoch = 0
    best_acc = 0.0

    if os.path.isfile(starting_checkpoint_path):
        logging.info(
            'Loading checkpoint from {0}'.format(starting_checkpoint_path))
        checkpoint = torch.load(starting_checkpoint_path)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        starting_epoch = checkpoint['epoch']
        best_acc = checkpoint.get('best_acc', 0.0)
    else:
        logging.info(
            'No valid checkpoint is provided. Start to train from scratch...')
        model.apply(weights_init)

    if evaluate_only:
        val_loss, val_acc = evaluate(loader_val, model, criterion)
        print('Evaluated on val set, loss is {}, accuracy is {}'.format(
            val_loss, val_acc))
        return

    step = starting_epoch * len(dset_train)

    for epoch in range(starting_epoch, total_epochs):
        logging.info('Epoch {} of {}'.format(epoch, total_epochs))

        # train for one epoch
        step = train(loader_train, model, criterion, optimizer, epoch, step,
                     logger_train)

        # evaluate on val set
        logging.info(
            'Evaluating model on the val set at the end of epoch {}...'.format(
                epoch))
        val_loss, val_acc = evaluate(loader_val, model, criterion)
        logging.info('\nEpoch {}, val loss is {}, val accuracy is {}\n'.format(
            epoch, step, val_loss, val_acc))
        logger_val.scalar_summary('val_loss', val_loss, step + 1)
        logger_val.scalar_summary('val_acc', val_acc, step + 1)
        # log the val images too

        # record the best accuracy; save checkpoint for every epoch
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)

        checkpoint_path = os.path.join(
            checkpoint_dir, 'checkpoint_epoch{}_{}.pth.tar'.format(
                epoch, strftime("%Y-%m-%d-%H-%M-%S", localtime())))
        logging.info(
            'Saving to checkoutpoint file at {}. Is it the highest accuracy checkpoint so far: {}'
            .format(checkpoint_path, str(is_best)))
        save_checkpoint(
            {
                'epoch':
                epoch + 1,  # saved checkpoints are numbered starting from 1
                'arch': model_choice,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_acc': best_acc
            },
            is_best,
            checkpoint_path,
            checkpoint_dir)
示例#3
0
def main(cp_path,
         input_image_dir,
         out_path,
         vis_dir=None,
         save_pred=save_pred_polys):
    """
    Applies the model at cp_path to input images and output the csv required for SpaceNet to
    compute the F-1 score and other metrics against the ground truth.

    Args:
        cp_path: path to the model checkpoint to use
        input_image_dir: path to directory containing the images to extract building footprints from,
         usually the val or test dir
        out_path: path of the output csv
        vis_dir: optionally a directory to place the visualization of polygons on each image
        save_pred: whether to save visualizations to vis_dir
    """
    if vis_dir:
        os.makedirs(vis_dir, exist_ok=True)

    checkpoint = torch.load(cp_path)

    if model_choice == 'unetv2':
        model = Unet(feature_scale=feature_scale,
                     n_classes=3,
                     is_deconv=True,
                     in_channels=3,
                     is_batchnorm=True)
    elif model_choice == 'unetbase':
        model = UnetBaseline(feature_scale=feature_scale,
                             n_classes=3,
                             is_deconv=True,
                             in_channels=3,
                             is_batchnorm=True)
    else:
        raise ValueError('Unknown model_choice={0}'.format(model_choice))

    model.load_state_dict(checkpoint['state_dict'])
    model = model.to(device=device, dtype=dtype)
    model.eval()  # set model to evaluation mode
    logging.info('Model loaded from checkpoint.')

    result_dfs = []

    image_files = os.listdir(input_image_dir)
    image_files = [
        image_file for image_file in image_files if image_file.endswith('.jpg')
    ]

    for image_name in tqdm(image_files):
        image_name_no_file_type = image_name.split('.jpg')[0]

        image_id = image_name_no_file_type.split('RGB-PanSharpen_')[
            1]  # of format _-115.3064538_36.1756826998
        image_path = os.path.join(input_image_dir, image_name)
        original_image = io.imread(image_path)

        image = original_image.transpose((2, 0, 1))
        image = torch.from_numpy(np.expand_dims(image, 0)).type(
            torch.float32).to(device=device, dtype=dtype)

        with torch.no_grad():
            scores = model(image)
            _, prediction = scores.max(1)

        prediction = prediction.reshape((256, 256)).cpu().data.numpy()

        result_df, polygons = mask_to_poly(prediction, image_id)
        result_dfs.append(result_df)

        # save prediction polygons visualization to output
        if save_pred and vis_dir:
            visualize_poly(polygons, prediction,
                           os.path.join(vis_dir, 'poly_' + image_name))

    all_df = pd.concat(result_dfs)

    logging.info('Writing result to csv, length of all_df is {}'.format(
        len(all_df)))
    with open(out_path, 'w') as f:
        f.write('ImageId,BuildingId,PolygonWKT_Pix,Confidence\n')

        for i, row in tqdm(all_df.iterrows()):
            f.write("{},{},\"{}\",{:.6f}\n".format(row.image_id, int(row.bid),
                                                   row.wkt, row.area_ratio))
示例#4
0
def main():
    global args, sample_images_train_tensors, sample_images_val_tensors
    args = parser.parse_args()
    print('args.world_size: ', args.world_size)
    print('args.dist_backend: ', args.dist_backend)
    print('args.rank: ', args.rank)

    # more info on distributed PyTorch see https://pytorch.org/tutorials/intermediate/dist_tuto.html
    args.distributed = args.world_size >= 2
    print('is distributed: '.format(args.distributed))
    if args.distributed:
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
        print('dist.init_process_group() finished.')

    # data sets and loaders
    dset_train = SpaceNetDataset(data_path_train, split_tags, transform=T.Compose([ToTensor()]))
    dset_val = SpaceNetDataset(data_path_val, split_tags, transform=T.Compose([ToTensor()]))
    logging.info('Training set size: {}, validation set size: {}'.format(
        len(dset_train), len(dset_val)))

    # need to instantiate these data loaders to produce the sample images because they need to be shuffled!
    loader_train = DataLoader(dset_train, batch_size=train_batch_size, shuffle=True,
                              num_workers=num_workers)  # shuffle True to reshuffle at every epoch

    loader_val = DataLoader(dset_val, batch_size=val_batch_size, shuffle=True,
                            num_workers=num_workers)

    # get one batch of sample images that are used to visualize the training progress throughout this run
    sample_images_train, sample_images_train_tensors = get_sample_images(loader_train, which_set='train')
    sample_images_val, sample_images_val_tensors = get_sample_images(loader_val, which_set='val')

    if args.distributed:
        # re-instantiate the training data loader to make distributed training possible
        train_batch_size_dist = train_batch_size * args.world_size
        logging.info('Using train_batch_size_dist {}.'.format(train_batch_size_dist))
        train_sampler = torch.utils.data.BatchSampler(
            torch.utils.data.distributed.DistributedSampler(dset_train),
            batch_size=train_batch_size_dist, drop_last=False)
        # TODO https://pytorch.org/docs/stable/data.html#torch.utils.data.distributed.DistributedSampler
        # check if need num_replicas and rank
        print('train_sampler created successfully.')
        loader_train = DataLoader(dset_train, num_workers=num_workers,
                                  pin_memory=True, batch_sample=train_sampler)

        loader_val = DataLoader(dset_val, batch_size=val_batch_size, shuffle=False,
                                num_workers=num_workers, pin_memory=True)
        print('both data loaders created successfully.')

    # checkpoint dir
    checkpoint_dir = out_checkpoint_dir

    logger_train = Logger('{}/train'.format(tensorboard_path))
    logger_val = Logger('{}/val'.format(tensorboard_path))
    log_sample_img_gt(sample_images_train, sample_images_val, logger_train, logger_val)
    logging.info('Logged ground truth image samples')

    num_classes = 3

    # larger model
    if model_choice == 'unet':
        model = Unet(feature_scale=feature_scale, n_classes=num_classes, is_deconv=True, in_channels=3, is_batchnorm=True)
    # year 2 best solution XD_XD's model, as the baseline model
    elif model_choice == 'unet_baseline':
        model = UnetBaseline(feature_scale=feature_scale, n_classes=num_classes, is_deconv=True, in_channels=3, is_batchnorm=True)
    else:
        sys.exit('Invalid model_choice {}, choose unet_baseline or unet'.format(model_choice))
    print('model instantiated.')

    if not args.distributed:
        model = model.to(device=device, dtype=dtype)  # move the model parameters to target device
        #model = torch.nn.DataParallel(model).cuda() # Batch AI example
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
        print('torch.nn.parallel.DistributedDataParallel() ran.')

    criterion = nn.CrossEntropyLoss(weight=loss_weights).to(device=device, dtype=dtype)

    # can also use Nesterov momentum in optim.SGD
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate,
    #                     momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # resume from a checkpoint if provided
    starting_epoch = 0
    best_acc = 0.0
    if os.path.isfile(starting_checkpoint_path):
        logging.info('Loading checkpoint from {0}'.format(starting_checkpoint_path))
        checkpoint = torch.load(starting_checkpoint_path)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        starting_epoch = checkpoint['epoch']
        best_acc = checkpoint.get('best_acc', 0.0)
    else:
        logging.info('No valid checkpoint is provided. Start to train from scratch...')
        model.apply(weights_init)

    # run training or evaluation
    if evaluate_only:
        val_loss, val_acc = evaluate(loader_val, model, criterion)
        print('Evaluated on val set, loss is {}, accuracy is {}'.format(val_loss, val_acc))
        return

    step = starting_epoch * len(dset_train)

    for epoch in range(starting_epoch, total_epochs):
        logging.info('Epoch {} of {}'.format(epoch, total_epochs))

        # train for one epoch
        step = train(loader_train, model, criterion, optimizer, epoch, step, logger_train)

        # evaluate on val set
        logging.info('Evaluating model on the val set at the end of epoch {}...'.format(epoch))
        val_loss, val_acc = evaluate(loader_val, model, criterion)
        logging.info('\nEpoch {}, val loss is {}, val accuracy is {}\n'.format(epoch, step, val_loss, val_acc))
        logger_val.scalar_summary('val_loss', val_loss, step + 1)
        logger_val.scalar_summary('val_acc', val_acc, step + 1)
        # log the val images too

        # record the best accuracy; save checkpoint for every epoch
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)

        checkpoint_path = os.path.join(checkpoint_dir,
                                       'checkpoint_epoch{}_{}.pth.tar'.format(epoch, strftime("%Y-%m-%d-%H-%M-%S", localtime())))
        logging.info(
            'Saving to checkoutpoint file at {}. Is it the highest accuracy checkpoint so far: {}'.format(
                checkpoint_path, str(is_best)))
        save_checkpoint({
            'epoch': epoch + 1,  # saved checkpoints are numbered starting from 1
            'arch': model_choice,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'best_acc': best_acc
        }, is_best, checkpoint_path, checkpoint_dir)