Пример #1
0
    def _test_class_inf(self, props, exp_class_ids, default_class_id=None):
        geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'properties': props,
                'geometry': {
                    'type': 'Point',
                    'coordinates': [1, 1]
                }
            }]
        }
        json_to_file(geojson, self.uri)

        class_map = ClassMap.construct_from(['building', 'car', 'tree'])
        class_id_to_filter = {
            1: ['==', 'type', 'building'],
            2: ['any', ['==', 'type', 'car'], ['==', 'type', 'auto']]
        }
        b = GeoJSONVectorSourceConfigBuilder() \
            .with_class_inference(class_id_to_filter=class_id_to_filter,
                                  default_class_id=default_class_id) \
            .with_uri(self.uri) \
            .build()
        msg = b.to_proto()
        config = GeoJSONVectorSourceConfig.from_proto(msg)
        source = config.create_source(crs_transformer=IdentityCRSTransformer(),
                                      class_map=class_map)
        trans_geojson = source.get_geojson()
        class_ids = [
            f['properties']['class_id'] for f in trans_geojson['features']
        ]
        self.assertEqual(class_ids, exp_class_ids)
Пример #2
0
def run(runner, cfg_path, commands, arg, splits):
    cfg_module = importlib.import_module(cfg_path)
    args = dict(arg)

    get_config = getattr(cfg_module, 'get_config', None)
    get_configs = get_config
    if get_config is None:
        get_configs = getattr(cfg_module, 'get_configs', None)

    new_args = {}
    for k, v in args.items():
        if v.lower() == 'true':
            v = True
        elif v.lower() == 'false':
            v = False
        new_args[k] = v
    args = new_args

    cfgs = get_configs(runner, **args)
    if not isinstance(cfgs, list):
        cfgs = [cfgs]

    for cfg in cfgs:
        cfg.update_all()
        cfg_dict = cfg.dict()
        cfg_json_uri = join(cfg.root_uri, 'pipeline.json')
        json_to_file(cfg_dict, cfg_json_uri)

        pipeline = cfg.get_pipeline()
        if not commands:
            commands = pipeline.commands

        runner = registry.get_runner(runner)()
        runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
Пример #3
0
    def setUp(self):
        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'MultiPolygon',
                    'coordinates': [[[[0., 0.], [0., 2.], [2., 2.], [2., 0.],
                                      [0., 0.]]]]
                },
                'properties': {
                    'class_name': 'car',
                    'class_id': 1,
                    'score': 0.0
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[2., 2.], [2., 4.], [4., 4.], [4., 2.],
                                     [2., 2.]]]
                },
                'properties': {
                    'score': 0.0,
                    'class_name': 'house',
                    'class_id': 2
                }
            }]
        }

        self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')])

        class MockTaskConfig():
            def __init__(self, class_map):
                self.class_map = class_map

        self.task_config = MockTaskConfig(self.class_map)

        self.box1 = Box.make_square(0, 0, 4)
        self.box2 = Box.make_square(4, 4, 4)
        self.class_id1 = 1
        self.class_id2 = 2
        self.background_class_id = 3

        geoms = []
        for f in self.geojson['features']:
            g = shape(f['geometry'])
            g.class_id = f['properties']['class_id']
            geoms.append(g)
        self.str_tree = STRtree(geoms)

        self.file_name = 'labels.json'
        self.temp_dir = RVConfig.get_tmp_dir()
        self.uri = os.path.join(self.temp_dir.name, self.file_name)
        json_to_file(self.geojson, self.uri)
Пример #4
0
 def eval_model(self, split):
     print('Evaluating on {} set...'.format(split))
     dl = self.get_dataloader(split)
     metrics = self.validate_epoch(dl)
     print('metrics: {}'.format(metrics))
     json_to_file(metrics,
                  join(self.output_dir, '{}_metrics.json'.format(split)))
     self.plot_predictions(split)
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/{scene_id}-{ind}.png and
        {scene_id}/{scene_id}-labels.json in COCO format.

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        scene_dir = join(tmp_dir, str(scene.id))
        labels_path = join(scene_dir, '{}-labels.json'.format(scene.id))

        make_dir(scene_dir)
        images = []
        annotations = []
        categories = [{
            'id': item.id,
            'name': item.name
        } for item in self.task_config.class_map.get_items()]

        for im_ind, (chip, window, labels) in enumerate(data):
            im_id = '{}-{}'.format(scene.id, im_ind)
            fn = '{}.png'.format(im_id)
            chip_path = join(scene_dir, fn)
            save_img(chip, chip_path)
            images.append({
                'file_name': fn,
                'id': im_id,
                'height': chip.shape[0],
                'width': chip.shape[1]
            })

            npboxes = labels.get_npboxes()
            npboxes = ObjectDetectionLabels.global_to_local(npboxes, window)
            for box_ind, (box, class_id) in enumerate(
                    zip(npboxes, labels.get_class_ids())):
                bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]]
                bbox = [int(i) for i in bbox]
                annotations.append({
                    'id': '{}-{}'.format(im_id, box_ind),
                    'image_id': im_id,
                    'bbox': bbox,
                    'category_id': int(class_id)
                })

        coco_dict = {
            'images': images,
            'annotations': annotations,
            'categories': categories
        }
        json_to_file(coco_dict, labels_path)

        return scene_dir
Пример #6
0
    def __init__(self, cfg: LearnerConfig, tmp_dir, model_path=None):
        self.cfg = cfg
        self.tmp_dir = tmp_dir

        torch_cache_dir = '/opt/data/torch-cache'
        os.environ['TORCH_HOME'] = torch_cache_dir
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.data_cache_dir = '/opt/data/data-cache'
        make_dir(self.data_cache_dir)

        self.model = self.build_model()
        self.model.to(self.device)

        if model_path is not None:
            if isfile(model_path):
                self.model.load_state_dict(
                    torch.load(model_path, map_location=self.device))
            else:
                raise Exception(
                    'Model could not be found at {}'.format(model_path))
            self.model.eval()
        else:
            print(self.cfg)

            self.train_ds = None
            self.train_dl = None
            self.valid_ds = None
            self.valid_dl = None
            self.test_ds = None
            self.test_dl = None

            if cfg.output_uri.startswith('s3://'):
                self.output_dir = get_local_path(cfg.output_uri, tmp_dir)
                make_dir(self.output_dir, force_empty=True)
                if not cfg.overfit_mode:
                    self.sync_from_cloud()
            else:
                self.output_dir = cfg.output_uri
                make_dir(self.output_dir)

            self.last_model_path = join(self.output_dir, 'last-model.pth')
            self.config_path = join(self.output_dir, 'config.json')
            self.train_state_path = join(self.output_dir, 'train-state.json')
            self.log_path = join(self.output_dir, 'log.csv')
            self.model_bundle_path = join(self.output_dir, 'model-bundle.zip')
            self.metric_names = self.build_metric_names()

            json_to_file(self.cfg.dict(), self.config_path)
            self.load_init_weights()
            self.load_checkpoint()
            self.opt = self.build_optimizer()
            self.build_data()
            self.start_epoch = self.get_start_epoch()
            self.steps_per_epoch = len(
                self.train_ds) // self.cfg.solver.batch_sz
            self.step_scheduler = self.build_step_scheduler()
            self.epoch_scheduler = self.build_epoch_scheduler()
 def save(self, labels):
     """Save labels to URI."""
     boxes = labels.get_boxes()
     class_ids = labels.get_class_ids().tolist()
     scores = labels.get_scores().tolist()
     geojson = boxes_to_geojson(boxes,
                                class_ids,
                                self.crs_transformer,
                                self.class_map,
                                scores=scores)
     json_to_file(geojson, self.uri)
Пример #8
0
    def save(self, labels):
        """Save labels to URI if writable.

        Note that if the grid is inferred from polygons, only the grid will be
        written, not the original polygons.
        """
        boxes = labels.get_cells()
        class_ids = labels.get_class_ids()
        scores = list(labels.get_scores())
        geojson = boxes_to_geojson(boxes,
                                   class_ids,
                                   self.crs_transformer,
                                   self.class_map,
                                   scores=scores)
        json_to_file(geojson, self.uri)
Пример #9
0
    def setUp(self):
        self.prev_keys = (os.environ.get('AWS_ACCESS_KEY_ID'),
                          os.environ.get('AWS_SECRET_ACCESS_KEY'))
        os.environ['AWS_ACCESS_KEY_ID'] = 'DUMMY'
        os.environ['AWS_SECRET_ACCESS_KEY'] = 'DUMMY'
        self.mock_s3 = mock_s3()
        self.mock_s3.start()

        self.file_name = 'labels.json'
        self.temp_dir = RVConfig.get_tmp_dir()
        self.file_path = os.path.join(self.temp_dir.name, self.file_name)

        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.],
                                     [0., 0.]]]
                },
                'properties': {
                    'class_id': 1,
                    'score': 0.9
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.],
                                     [1., 1.]]]
                },
                'properties': {
                    'score': 0.9,
                    'class_id': 2
                }
            }]
        }

        self.extent = Box.make_square(0, 0, 10)
        self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')])

        json_to_file(self.geojson, self.file_path)
Пример #10
0
    def build_source(self, geojson):
        json_to_file(geojson, self.uri)

        config = RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
            .with_uri(self.uri) \
            .with_rasterizer_options(self.background_class_id) \
            .build()

        # Convert to proto and back as a test.
        config = RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
            .from_proto(config.to_proto()) \
            .build()

        source = config.create_source(self.uri, self.crs_transformer,
                                      self.extent)

        return source
    def setUp(self):
        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.],
                                     [0., 0.]]]
                },
                'properties': {
                    'class_name': 'car',
                    'class_id': 1
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.],
                                     [1., 1.]]]
                },
                'properties': {
                    'class_name': 'house',
                    'class_id': 2
                }
            }]
        }

        self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')])

        class MockTaskConfig():
            def __init__(self, class_map):
                self.class_map = class_map

        self.task_config = MockTaskConfig(self.class_map)
        self.temp_dir = RVConfig.get_tmp_dir()
        self.uri = os.path.join(self.temp_dir.name, 'labels.json')

        json_to_file(self.geojson, self.uri)
Пример #12
0
 def transform_geojson(self,
                       geojson,
                       line_bufs=None,
                       point_bufs=None,
                       crs_transformer=None,
                       to_map_coords=False):
     if crs_transformer is None:
         crs_transformer = IdentityCRSTransformer()
     class_map = ClassMap.construct_from(['building'])
     json_to_file(geojson, self.uri)
     b = GeoJSONVectorSourceConfigBuilder() \
         .with_uri(self.uri) \
         .with_buffers(line_bufs=line_bufs, point_bufs=point_bufs) \
         .build()
     msg = b.to_proto()
     config = GeoJSONVectorSourceConfig.from_proto(msg)
     source = config.create_source(crs_transformer=crs_transformer,
                                   class_map=class_map)
     return source.get_geojson(to_map_coords=to_map_coords)
    def setUp(self):
        self.file_name = 'labels.json'
        self.temp_dir = RVConfig.get_tmp_dir()
        self.file_path = os.path.join(self.temp_dir.name, self.file_name)

        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.],
                                     [0., 0.]]]
                },
                'properties': {
                    'class_id': 1,
                    'score': 0.9
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.],
                                     [1., 1.]]]
                },
                'properties': {
                    'score': 0.9,
                    'class_id': 2
                }
            }]
        }

        self.extent = Box.make_square(0, 0, 10)
        self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')])

        json_to_file(self.geojson, self.file_path)
Пример #14
0
def compute_coco_eval(outputs, targets, num_labels):
    """Return mAP averaged over 0.5-0.95 using pycocotools eval.

    Note: boxes are in (ymin, xmin, ymax, xmax) format with values ranging
        from 0 to h or w.

    Args:
        outputs: (list) of length m containing dicts of form
            {'boxes': <tensor with shape (n, 4)>,
             'labels': <tensor with shape (n,)>,
             'scores': <tensor with shape (n,)>}
        targets: (list) of length m containing dicts of form
            {'boxes': <tensor with shape (n, 4)>,
             'labels': <tensor with shape (n,)>}
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        preds = get_coco_preds(outputs)
        # ap is undefined when there are no predicted boxes
        if len(preds) == 0:
            return None

        gt = get_coco_gt(targets, num_labels)
        gt_path = join(tmp_dir, 'gt.json')
        json_to_file(gt, gt_path)
        coco_gt = COCO(gt_path)

        pycocotools.coco.unicode = None
        coco_preds = coco_gt.loadRes(preds)

        ann_type = 'bbox'
        coco_eval = COCOeval(coco_gt, coco_preds, ann_type)

        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        return coco_eval
Пример #15
0
def save_image_crop(image_uri,
                    image_crop_uri,
                    label_uri=None,
                    label_crop_uri=None,
                    size=600,
                    min_features=10):
    """Save a crop of an image to use for testing.

    If label_uri is set, the crop needs to cover >= min_features.

    Args:
        image_uri: URI of original image
        image_crop_uri: URI of cropped image to save
        label_uri: optional URI of GeoJSON file
        size: height and width of crop

    Raises:
        ValueError if cannot find a crop satisfying min_features constraint.
    """
    if not file_exists(image_crop_uri):
        print('Saving test crop to {}...'.format(image_crop_uri))
        old_environ = os.environ.copy()
        try:
            request_payer = S3FileSystem.get_request_payer()
            if request_payer == 'requester':
                os.environ['AWS_REQUEST_PAYER'] = request_payer
            im_dataset = rasterio.open(image_uri)
            h, w = im_dataset.height, im_dataset.width

            extent = Box(0, 0, h, w)
            windows = extent.get_windows(size, size)
            if label_uri is not None:
                crs_transformer = RasterioCRSTransformer.from_dataset(
                    im_dataset)
                vs = GeoJSONVectorSource(label_uri, crs_transformer)
                geojson = vs.get_geojson()
                geoms = []
                for f in geojson['features']:
                    g = shape(f['geometry'])
                    geoms.append(g)
                tree = STRtree(geoms)

            def p2m(x, y, z=None):
                return crs_transformer.pixel_to_map((x, y))

            for w in windows:
                use_window = True
                if label_uri is not None:
                    w_polys = tree.query(w.to_shapely())
                    use_window = len(w_polys) >= min_features
                    if use_window and label_crop_uri is not None:
                        print('Saving test crop labels to {}...'.format(
                            label_crop_uri))

                        label_crop_features = [
                            mapping(shapely.ops.transform(p2m, wp))
                            for wp in w_polys
                        ]
                        label_crop_json = {
                            'type':
                            'FeatureCollection',
                            'features': [{
                                'geometry': f
                            } for f in label_crop_features]
                        }
                        json_to_file(label_crop_json, label_crop_uri)

                if use_window:
                    w = w.rasterio_format()
                    im = im_dataset.read(window=w)

                    with tempfile.TemporaryDirectory() as tmp_dir:
                        crop_path = get_local_path(image_crop_uri, tmp_dir)
                        make_dir(crop_path, use_dirname=True)

                        meta = im_dataset.meta
                        meta['width'], meta['height'] = size, size
                        meta['transform'] = rasterio.windows.transform(
                            w, im_dataset.transform)

                        with rasterio.open(crop_path, 'w', **meta) as dst:
                            dst.colorinterp = im_dataset.colorinterp
                            dst.write(im)

                        upload_or_copy(crop_path, image_crop_uri)
                    break

            if not use_window:
                raise ValueError('Could not find a good crop.')
        finally:
            os.environ.clear()
            os.environ.update(old_environ)
Пример #16
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        batch_size = self.train_opts.batch_size
        chip_size = self.task_config.chip_size
        class_names = self.class_map.get_class_names()
        databunch = build_databunch(chip_dir, chip_size, batch_size,
                                    class_names)
        log.info(databunch)
        num_labels = len(databunch.label_names)
        if self.train_opts.debug:
            make_debug_chips(databunch, self.class_map, tmp_dir, train_uri)

        # Setup model
        num_labels = len(databunch.label_names)
        model = get_model(self.train_opts.model_arch,
                          num_labels,
                          pretrained=True)
        model = model.to(self.device)
        model_path = join(train_dir, 'model')

        # Load weights from a pretrained model.
        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            log.info('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            model.load_state_dict(
                torch.load(pretrained_path, map_location=self.device))

        # Possibly resume training from checkpoint.
        start_epoch = 0
        train_state_path = join(train_dir, 'train_state.json')
        if isfile(train_state_path):
            log.info('Resuming from checkpoint: {}\n'.format(model_path))
            train_state = file_to_json(train_state_path)
            start_epoch = train_state['epoch'] + 1
            model.load_state_dict(
                torch.load(model_path, map_location=self.device))

        # Write header of log CSV file.
        metric_names = ['precision', 'recall', 'f1']
        log_path = join(train_dir, 'log.csv')
        if not isfile(log_path):
            with open(log_path, 'w') as log_file:
                log_writer = csv.writer(log_file)
                row = ['epoch', 'time', 'train_loss'] + metric_names
                log_writer.writerow(row)

        # Setup Tensorboard logging.
        if self.train_opts.log_tensorboard:
            log_dir = join(train_dir, 'tb-logs')
            make_dir(log_dir)
            tb_writer = SummaryWriter(log_dir=log_dir)
            if self.train_opts.run_tensorboard:
                log.info('Starting tensorboard process')
                tensorboard_process = Popen(
                    ['tensorboard', '--logdir={}'.format(log_dir)])
                terminate_at_exit(tensorboard_process)

        # Setup optimizer, loss, and LR scheduler.
        loss_fn = torch.nn.CrossEntropyLoss()
        lr = self.train_opts.lr
        opt = optim.Adam(model.parameters(), lr=lr)
        step_scheduler, epoch_scheduler = None, None
        num_epochs = self.train_opts.num_epochs

        if self.train_opts.one_cycle and num_epochs > 1:
            steps_per_epoch = len(databunch.train_ds) // batch_size
            total_steps = num_epochs * steps_per_epoch
            step_size_up = (num_epochs // 2) * steps_per_epoch
            step_size_down = total_steps - step_size_up
            step_scheduler = CyclicLR(opt,
                                      base_lr=lr / 10,
                                      max_lr=lr,
                                      step_size_up=step_size_up,
                                      step_size_down=step_size_down,
                                      cycle_momentum=False)
            for _ in range(start_epoch * steps_per_epoch):
                step_scheduler.step()

        # Training loop.
        for epoch in range(start_epoch, num_epochs):
            # Train one epoch.
            log.info('-----------------------------------------------------')
            log.info('epoch: {}'.format(epoch))
            start = time.time()
            train_loss = train_epoch(model, self.device, databunch.train_dl,
                                     opt, loss_fn, step_scheduler)
            if epoch_scheduler:
                epoch_scheduler.step()
            log.info('train loss: {}'.format(train_loss))

            # Validate one epoch.
            metrics = validate_epoch(model, self.device, databunch.valid_dl,
                                     num_labels)
            log.info('validation metrics: {}'.format(metrics))

            # Print elapsed time for epoch.
            end = time.time()
            epoch_time = datetime.timedelta(seconds=end - start)
            log.info('epoch elapsed time: {}'.format(epoch_time))

            # Save model and state.
            torch.save(model.state_dict(), model_path)
            train_state = {'epoch': epoch}
            json_to_file(train_state, train_state_path)

            # Append to log CSV file.
            with open(log_path, 'a') as log_file:
                log_writer = csv.writer(log_file)
                row = [epoch, epoch_time, train_loss]
                row += [metrics[k] for k in metric_names]
                log_writer.writerow(row)

            # Write to Tensorboard log.
            if self.train_opts.log_tensorboard:
                for key, val in metrics.items():
                    tb_writer.add_scalar(key, val, epoch)
                tb_writer.add_scalar('train_loss', train_loss, epoch)
                for name, param in model.named_parameters():
                    tb_writer.add_histogram(name, param, epoch)

            if (train_uri.startswith('s3://')
                    and (((epoch + 1) % self.train_opts.sync_interval) == 0)):
                sync_to_dir(train_dir, train_uri)

        # Close Tensorboard.
        if self.train_opts.log_tensorboard:
            tb_writer.close()
            if self.train_opts.run_tensorboard:
                tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Пример #17
0
def save_image_crop(image_uri,
                    image_crop_uri,
                    label_uri=None,
                    label_crop_uri=None,
                    size=600,
                    min_features=10,
                    vector_labels=True):
    """Save a crop of an image to use for testing.

    If label_uri is set, the crop needs to cover >= min_features.

    Args:
        image_uri: URI of original image
        image_crop_uri: URI of cropped image to save
        label_uri: optional URI of label file
        label_crop_uri: optional URI of cropped labels to save
        size: height and width of crop

    Raises:
        ValueError if cannot find a crop satisfying min_features constraint.
    """
    if not file_exists(image_crop_uri):
        print('Saving test crop to {}...'.format(image_crop_uri))
        old_environ = os.environ.copy()
        try:
            request_payer = S3FileSystem.get_request_payer()
            if request_payer == 'requester':
                os.environ['AWS_REQUEST_PAYER'] = request_payer
            im_dataset = rasterio.open(image_uri)
            h, w = im_dataset.height, im_dataset.width

            extent = Box(0, 0, h, w)
            windows = extent.get_windows(size, size)
            if label_uri and vector_labels:
                crs_transformer = RasterioCRSTransformer.from_dataset(
                    im_dataset)
                vs = GeoJSONVectorSource(label_uri, crs_transformer)
                geojson = vs.get_geojson()
                geoms = []
                for f in geojson['features']:
                    g = shape(f['geometry'])
                    geoms.append(g)
                tree = STRtree(geoms)

            def p2m(x, y, z=None):
                return crs_transformer.pixel_to_map((x, y))

            for w in windows:
                use_window = True
                if label_uri and vector_labels:
                    w_polys = tree.query(w.to_shapely())
                    use_window = len(w_polys) >= min_features
                    if use_window and label_crop_uri is not None:
                        print('Saving test crop labels to {}...'.format(
                            label_crop_uri))

                        label_crop_features = [
                            mapping(shapely.ops.transform(p2m, wp))
                            for wp in w_polys
                        ]
                        label_crop_json = {
                            'type':
                            'FeatureCollection',
                            'features': [{
                                'geometry': f
                            } for f in label_crop_features]
                        }
                        json_to_file(label_crop_json, label_crop_uri)

                if use_window:
                    crop_image(image_uri, w, image_crop_uri)

                    if not vector_labels and label_uri and label_crop_uri:
                        crop_image(label_uri, w, label_crop_uri)

                    break

            if not use_window:
                raise ValueError('Could not find a good crop.')
        finally:
            os.environ.clear()
            os.environ.update(old_environ)