Exemplo n.º 1
0
def run(runner: str, cfg_module: str, commands: List[str],
        arg: List[Tuple[str, str]], splits: int):
    """Subcommand to run commands within pipelines using runner named RUNNER.

    Args:
        runner: name of runner to use
        cfg_module: name of module with `get_configs` function that returns
            PipelineConfigs
        commands: names of commands to run within pipeline. The order in which
            to run them is based on the Pipeline.commands attribute. If this is
            omitted, all commands will be run.
    """
    tmp_dir_obj = rv_config.get_tmp_dir()
    tmp_dir = tmp_dir_obj.name

    cfg_module = importlib.import_module(cfg_module)
    args = dict(arg)
    args = convert_bool_args(args)
    cfgs = get_configs(cfg_module, runner, args)
    runner = registry.get_runner(runner)()

    for cfg in cfgs:
        cfg.update()
        cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
        cfg.recursive_validate_config()

        cfg_dict = cfg.dict()
        cfg_json_uri = cfg.get_config_uri()
        json_to_file(cfg_dict, cfg_json_uri)

        pipeline = cfg.build(tmp_dir)
        if not commands:
            commands = pipeline.commands

        runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
Exemplo n.º 2
0
    def _test_class_inf(self, props, exp_class_ids, default_class_id=None):
        geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'properties': props,
                'geometry': {
                    'type': 'Point',
                    'coordinates': [1, 1]
                }
            }]
        }
        json_to_file(geojson, self.uri)

        class_config = ClassConfig(names=['building', 'car', 'tree'])
        class_id_to_filter = {
            0: ['==', 'type', 'building'],
            1: ['any', ['==', 'type', 'car'], ['==', 'type', 'auto']]
        }
        vs_cfg = GeoJSONVectorSourceConfig(
            uri=self.uri,
            class_id_to_filter=class_id_to_filter,
            default_class_id=default_class_id)
        vs = vs_cfg.build(class_config, IdentityCRSTransformer())
        trans_geojson = vs.get_geojson()
        class_ids = [
            f['properties']['class_id'] for f in trans_geojson['features']
        ]
        self.assertEqual(class_ids, exp_class_ids)
Exemplo n.º 3
0
def run(runner: str, cfg_module: str, commands: List[str],
        arg: List[Tuple[str, str]], splits: int):
    """Run COMMANDS within pipelines in CFG_MODULE using RUNNER.

    RUNNER: name of the Runner to use

    CFG_MODULE: the module with `get_configs` function that returns PipelineConfigs.
    This can either be a Python module path or a local path to a .py file.

    COMMANDS: space separated sequence of commands to run within pipeline. The order in
    which to run them is based on the Pipeline.commands attribute. If this is omitted,
    all commands will be run.
    """
    tmp_dir_obj = rv_config.get_tmp_dir()
    tmp_dir = tmp_dir_obj.name

    args = dict(arg)
    args = convert_bool_args(args)
    cfgs = get_configs(cfg_module, runner, args)
    runner = registry.get_runner(runner)()

    for cfg in cfgs:
        cfg.update()
        cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
        cfg.recursive_validate_config()

        cfg_dict = cfg.dict()
        cfg_json_uri = cfg.get_config_uri()
        json_to_file(cfg_dict, cfg_json_uri)

        pipeline = cfg.build(tmp_dir)
        if not commands:
            commands = pipeline.commands

        runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
Exemplo n.º 4
0
 def eval_model(self, split):
     log.info('Evaluating on {} set...'.format(split))
     dl = self.get_dataloader(split)
     metrics = self.validate_epoch(dl)
     log.info('metrics: {}'.format(metrics))
     json_to_file(metrics,
                  join(self.output_dir, '{}_metrics.json'.format(split)))
     self.plot_predictions(split)
Exemplo n.º 5
0
    def build_source(self, geojson, all_touched=False):
        json_to_file(geojson, self.uri)

        config = RasterizedSourceConfig(
            vector_source=GeoJSONVectorSourceConfig(uri=self.uri, default_class_id=None),
            rasterizer_config=RasterizerConfig(
                background_class_id=self.background_class_id,
                all_touched=all_touched))
        source = config.build(self.class_config, self.crs_transformer, self.extent)
        return source
 def save(self, labels):
     """Save labels to URI."""
     boxes = labels.get_boxes()
     class_ids = labels.get_class_ids().tolist()
     scores = labels.get_scores().tolist()
     geojson = boxes_to_geojson(boxes,
                                class_ids,
                                self.crs_transformer,
                                self.class_config,
                                scores=scores)
     json_to_file(geojson, self.uri)
Exemplo n.º 7
0
    def setUp(self):
        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'MultiPolygon',
                    'coordinates': [[[[0., 0.], [0., 2.], [2., 2.], [2., 0.],
                                      [0., 0.]]]]
                },
                'properties': {
                    'class_name': 'car',
                    'class_id': 0,
                    'score': 0.0
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[2., 2.], [2., 4.], [4., 4.], [4., 2.],
                                     [2., 2.]]]
                },
                'properties': {
                    'score': 0.0,
                    'class_name': 'house',
                    'class_id': 1
                }
            }]
        }

        self.class_config = ClassConfig(names=['car', 'house'])

        self.box1 = Box.make_square(0, 0, 4)
        self.box2 = Box.make_square(4, 4, 4)
        self.class_id1 = 0
        self.class_id2 = 1
        self.background_class_id = 2

        geoms = []
        for f in self.geojson['features']:
            g = shape(f['geometry'])
            g.class_id = f['properties']['class_id']
            geoms.append(g)
        self.str_tree = STRtree(geoms)

        self.file_name = 'labels.json'
        self.tmp_dir = rv_config.get_tmp_dir()
        self.uri = os.path.join(self.tmp_dir.name, self.file_name)
        json_to_file(self.geojson, self.uri)
    def save(self, labels):
        """Save labels to URI if writable.

        Note that if the grid is inferred from polygons, only the grid will be
        written, not the original polygons.
        """
        boxes = labels.get_cells()
        class_ids = labels.get_class_ids()
        scores = list(labels.get_scores())
        geojson = boxes_to_geojson(boxes,
                                   class_ids,
                                   self.crs_transformer,
                                   self.class_config,
                                   scores=scores)
        json_to_file(geojson, self.uri)
Exemplo n.º 9
0
    def eval_model(self, split: str):
        """Evaluate model using a particular dataset split.

        Gets validation metrics and saves them along with prediction plots.

        Args:
            split: the dataset split to use: train, valid, or test.
        """
        log.info('Evaluating on {} set...'.format(split))
        dl = self.get_dataloader(split)
        metrics = self.validate_epoch(dl)
        log.info('metrics: {}'.format(metrics))
        json_to_file(metrics,
                     join(self.output_dir, '{}_metrics.json'.format(split)))
        self.plot_predictions(split)
Exemplo n.º 10
0
 def transform_geojson(self,
                       geojson,
                       line_bufs=None,
                       point_bufs=None,
                       crs_transformer=None,
                       to_map_coords=False):
     if crs_transformer is None:
         crs_transformer = IdentityCRSTransformer()
     class_config = ClassConfig(names=['building'])
     json_to_file(geojson, self.uri)
     cfg = GeoJSONVectorSourceConfig(uri=self.uri,
                                     line_bufs=line_bufs,
                                     point_bufs=point_bufs,
                                     default_class_id=0)
     source = cfg.build(class_config, crs_transformer)
     return source.get_geojson(to_map_coords=to_map_coords)
    def __exit__(self, type, value, traceback):
        for split in ['train', 'valid']:
            if len(self.splits[split]['images']) > 0:
                split_dir = join(self.sample_dir, split)
                labels_path = join(split_dir, 'labels.json')

                images = self.splits[split]['images']
                annotations = self.splits[split]['annotations']
                coco_dict = {
                    'images': images,
                    'annotations': annotations,
                    'categories': self.categories
                }
                json_to_file(coco_dict, labels_path)

        super().__exit__(type, value, traceback)
Exemplo n.º 12
0
    def setUp(self):
        self.file_name = 'labels.json'
        self.tmp_dir = rv_config.get_tmp_dir()
        self.file_path = os.path.join(self.tmp_dir.name, self.file_name)

        self.crs_transformer = DoubleCRSTransformer()
        self.geojson = {
            'type':
            'FeatureCollection',
            'features': [{
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.],
                                     [0., 0.]]]
                },
                'properties': {
                    'class_id': 0,
                    'score': 0.9
                }
            }, {
                'type': 'Feature',
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.],
                                     [1., 1.]]]
                },
                'properties': {
                    'score': 0.9,
                    'class_id': 1
                }
            }]
        }

        self.extent = Box.make_square(0, 0, 10)
        self.class_config = ClassConfig(names=['car', 'house'])
        json_to_file(self.geojson, self.file_path)
def compute_coco_eval(outputs, targets, num_class_ids):
    """Return mAP averaged over 0.5-0.95 using pycocotools eval.

    Note: boxes are in (ymin, xmin, ymax, xmax) format with values ranging
        from 0 to h or w.

    Args:
        outputs: (list) of length m containing dicts of form
            {'boxes': <tensor with shape (n, 4)>,
             'class_ids': <tensor with shape (n,)>,
             'scores': <tensor with shape (n,)>}
        targets: (list) of length m containing dicts of form
            {'boxes': <tensor with shape (n, 4)>,
             'class_ids': <tensor with shape (n,)>}
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        preds = get_coco_preds(outputs)
        # ap is undefined when there are no predicted boxes
        if len(preds) == 0:
            return None

        gt = get_coco_gt(targets, num_class_ids)
        gt_path = join(tmp_dir, 'gt.json')
        json_to_file(gt, gt_path)
        coco_gt = COCO(gt_path)

        pycocotools.coco.unicode = None
        coco_preds = coco_gt.loadRes(preds)

        ann_type = 'bbox'
        coco_eval = COCOeval(coco_gt, coco_preds, ann_type)

        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        return coco_eval
Exemplo n.º 14
0
    def __init__(self,
                 cfg: LearnerConfig,
                 tmp_dir: str,
                 model_path: Optional[str] = None):
        """Constructor.

        Args:
            cfg: configuration
            tmp_dir: root of temp dirs
            model_path: a local path to model weights. If provided, the model is loaded
                and it is assumed that this Learner will be used for prediction only.
        """
        self.cfg = cfg
        self.tmp_dir = tmp_dir

        # TODO make cache dirs configurable
        torch_cache_dir = '/opt/data/torch-cache'
        os.environ['TORCH_HOME'] = torch_cache_dir
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.data_cache_dir = '/opt/data/data-cache'
        make_dir(self.data_cache_dir)

        self.model = self.build_model()
        self.model.to(self.device)

        if model_path is not None:
            if isfile(model_path):
                self.model.load_state_dict(
                    torch.load(model_path, map_location=self.device))
            else:
                raise Exception(
                    'Model could not be found at {}'.format(model_path))
            self.model.eval()
        else:
            log.info(self.cfg)

            # ds = dataset, dl = dataloader
            self.train_ds = None
            self.train_dl = None
            self.valid_ds = None
            self.valid_dl = None
            self.test_ds = None
            self.test_dl = None

            if cfg.output_uri.startswith('s3://'):
                self.output_dir = get_local_path(cfg.output_uri, tmp_dir)
                make_dir(self.output_dir, force_empty=True)
                if not cfg.overfit_mode:
                    self.sync_from_cloud()
            else:
                self.output_dir = cfg.output_uri
                make_dir(self.output_dir)

            self.last_model_path = join(self.output_dir, 'last-model.pth')
            self.config_path = join(self.output_dir, 'config.json')
            self.train_state_path = join(self.output_dir, 'train-state.json')
            self.log_path = join(self.output_dir, 'log.csv')
            model_bundle_fn = basename(cfg.get_model_bundle_uri())
            self.model_bundle_path = join(self.output_dir, model_bundle_fn)
            self.metric_names = self.build_metric_names()

            json_to_file(self.cfg.dict(), self.config_path)
            self.load_init_weights()
            self.load_checkpoint()
            self.opt = self.build_optimizer()
            self.setup_data()
            self.start_epoch = self.get_start_epoch()
            self.steps_per_epoch = len(
                self.train_ds) // self.cfg.solver.batch_sz
            self.step_scheduler = self.build_step_scheduler()
            self.epoch_scheduler = self.build_epoch_scheduler()
            self.setup_tensorboard()
Exemplo n.º 15
0
def save_image_crop(image_uri,
                    image_crop_uri,
                    label_uri=None,
                    label_crop_uri=None,
                    size=600,
                    min_features=10,
                    vector_labels=True,
                    class_config=None):
    """Save a crop of an image to use for testing.

    If label_uri is set, the crop needs to cover >= min_features.

    Args:
        image_uri: URI of original image
        image_crop_uri: URI of cropped image to save
        label_uri: optional URI of label file
        label_crop_uri: optional URI of cropped labels to save
        size: height and width of crop

    Raises:
        ValueError if cannot find a crop satisfying min_features constraint.
    """
    if not file_exists(image_crop_uri):
        print('Saving test crop to {}...'.format(image_crop_uri))
        old_environ = os.environ.copy()
        try:
            request_payer = S3FileSystem.get_request_payer()
            if request_payer == 'requester':
                os.environ['AWS_REQUEST_PAYER'] = request_payer
            im_dataset = rasterio.open(image_uri)
            h, w = im_dataset.height, im_dataset.width

            extent = Box(0, 0, h, w)
            windows = extent.get_windows(size, size)
            if label_uri and vector_labels:
                crs_transformer = RasterioCRSTransformer.from_dataset(
                    im_dataset)
                geojson_vs_config = GeoJSONVectorSourceConfig(uri=label_uri)
                vs = geojson_vs_config.build(class_config, crs_transformer)
                geojson = vs.get_geojson()
                geoms = []
                for f in geojson['features']:
                    g = shape(f['geometry'])
                    geoms.append(g)
                tree = STRtree(geoms)

            def p2m(x, y, z=None):
                return crs_transformer.pixel_to_map((x, y))

            for w in windows:
                use_window = True
                if label_uri and vector_labels:
                    w_polys = tree.query(w.to_shapely())
                    use_window = len(w_polys) >= min_features
                    if use_window and label_crop_uri is not None:
                        print('Saving test crop labels to {}...'.format(
                            label_crop_uri))

                        label_crop_features = [
                            mapping(transform(p2m, wp)) for wp in w_polys
                        ]
                        label_crop_json = {
                            'type':
                            'FeatureCollection',
                            'features': [{
                                'geometry': f
                            } for f in label_crop_features]
                        }
                        json_to_file(label_crop_json, label_crop_uri)

                if use_window:
                    crop_image(image_uri, w, image_crop_uri)

                    if not vector_labels and label_uri and label_crop_uri:
                        crop_image(label_uri, w, label_crop_uri)

                    break

            if not use_window:
                raise ValueError('Could not find a good crop.')
        finally:
            os.environ.clear()
            os.environ.update(old_environ)