def test_evaluator(self): output_uri = join(self.tmp_dir.name, 'out.json') scenes = [self.get_scene(0), self.get_scene(1)] evaluator = SemanticSegmentationEvaluator(self.class_config, output_uri, None) evaluator.process(scenes, self.tmp_dir.name) eval_json = file_to_json(output_uri) exp_eval_json = file_to_json(data_file_path('expected-eval.json')) self.assertDictEqual(eval_json, exp_eval_json)
def _compare_evals( root_uri_old: str, root_uri_new: str, float_tol: float = 1e-3, exclude_keys: list = ['conf_mat', 'count_error', 'per_scene']) -> None: """Compare outputs of the eval command for two runs of an example.""" console_heading('Comparing keys and values in eval.json files...') eval_json_old = join(root_uri_old, 'eval.json') eval_json_new = join(root_uri_new, 'eval.json') eval_old = file_to_json(eval_json_old) eval_new = file_to_json(eval_json_new) _compare_dicts(eval_old, eval_new, float_tol=float_tol, exclude_keys=exclude_keys)
def from_model_bundle(model_bundle_uri: str, tmp_dir: str): """Create a Learner from a model bundle.""" model_bundle_path = download_if_needed(model_bundle_uri, tmp_dir) model_bundle_dir = join(tmp_dir, 'model-bundle') unzip(model_bundle_path, model_bundle_dir) config_path = join(model_bundle_dir, 'pipeline-config.json') model_path = join(model_bundle_dir, 'model.pth') config_dict = file_to_json(config_path) config_dict = upgrade_config(config_dict) cfg = build_config(config_dict) hub_dir = join(model_bundle_dir, MODULES_DIRNAME) model_def_path = None loss_def_path = None # retrieve existing model definition, if available ext_cfg = cfg.learner.model.external_def if ext_cfg is not None: model_def_path = get_hubconf_dir_from_cfg(ext_cfg, parent=hub_dir) log.info( f'Using model definition found in bundle: {model_def_path}') # retrieve existing loss function definition, if available ext_cfg = cfg.learner.solver.external_loss_def if ext_cfg is not None: loss_def_path = get_hubconf_dir_from_cfg(ext_cfg, parent=hub_dir) log.info(f'Using loss definition found in bundle: {loss_def_path}') return cfg.learner.build(tmp_dir=tmp_dir, model_path=model_path, model_def_path=model_def_path, loss_def_path=loss_def_path)
def test_accounts_for_aoi(self): class_config = ClassConfig(names=['car', 'building', 'background']) label_source_uri = data_file_path('evaluator/cc-label-filtered.json') label_source_cfg = ChipClassificationLabelSourceConfig( vector_source=GeoJSONVectorSourceConfig( uri=label_source_uri, default_class_id=None)) label_store_uri = data_file_path('evaluator/cc-label-full.json') label_store_cfg = ChipClassificationGeoJSONStoreConfig( uri=label_store_uri) raster_source_uri = data_file_path('evaluator/cc-label-img-blank.tif') raster_source_cfg = RasterioSourceConfig(uris=[raster_source_uri]) aoi_uri = data_file_path('evaluator/cc-label-aoi.json') s = SceneConfig( id='test', raster_source=raster_source_cfg, label_source=label_source_cfg, label_store=label_store_cfg, aoi_uris=[aoi_uri]) with rv_config.get_tmp_dir() as tmp_dir: scene = s.build(class_config, tmp_dir) output_uri = os.path.join(tmp_dir, 'eval.json') evaluator = ChipClassificationEvaluatorConfig( output_uri=output_uri).build(class_config) evaluator.process([scene], tmp_dir) overall = file_to_json(output_uri)['overall'] for item in overall: self.assertEqual(item['f1'], 1.0)
def collect_eval_dir(root_uri): eval_json_uris = list_paths(join(root_uri, 'eval'), ext='eval.json') for eval_json_uri in eval_json_uris: eval_json = file_to_json(eval_json_uri) print(basename(dirname(eval_json_uri))) print(eval_json['overall'][-1]['f1']) print()
def collect_experiment(key, root_uri, output_dir, get_pred_package=False): print('\nCollecting experiment {}...\n'.format(key)) if root_uri.startswith('s3://'): predict_package_uris = list_paths(join(root_uri, key, 'bundle'), ext='predict_package.zip') eval_json_uris = list_paths(join(root_uri, key, 'eval'), ext='eval.json') else: predict_package_uris = glob.glob( join(root_uri, key, 'bundle', '*', 'predict_package.zip')) eval_json_uris = glob.glob( join(root_uri, key, 'eval', '*', 'eval.json')) if len(predict_package_uris) > 1 or len(eval_json_uris) > 1: print('Cannot collect from key with multiple experiments!!!') return if len(predict_package_uris) == 0 or len(eval_json_uris) == 0: print('Missing output!!!') return predict_package_uri = predict_package_uris[0] eval_json_uri = eval_json_uris[0] make_dir(join(output_dir, key)) if get_pred_package: download_or_copy(predict_package_uri, join(output_dir, key)) download_or_copy(eval_json_uri, join(output_dir, key)) eval_json = file_to_json(join(output_dir, key, 'eval.json')) pprint.pprint(eval_json['overall'], indent=4)
def test_vector_evaluator_with_aoi(self): output_uri = join(self.tmp_dir.name, 'raster-out.json') vector_output_uri = join(self.tmp_dir.name, 'vector-out.json') scenes = [self.get_vector_scene(0, use_aoi=True)] evaluator = SemanticSegmentationEvaluator(self.class_config, output_uri, vector_output_uri) evaluator.process(scenes, self.tmp_dir.name) vector_eval_json = file_to_json(vector_output_uri) exp_vector_eval_json = file_to_json( data_file_path('expected-vector-eval-with-aoi.json')) # NOTE: The precision and recall values found in the file # `expected-vector-eval.json` are equal to fractions of the # form (n-1)/n for n <= 7 which can be seen to be (and have # been manually verified to be) correct. self.assertDictEqual(vector_eval_json, exp_vector_eval_json)
def is_label_item(item: Item) -> bool: """Resolve each extension schema into a dict, then check if it has the title of "Label Extension". """ for ext_schema_uri in item.stac_extensions: schema = file_to_json(ext_schema_uri) if schema['title'].lower() == 'label extension': return True return False
def check_eval(test, tmp_dir): errors = [] actual_eval_path = get_actual_eval_path(test, tmp_dir) expected_eval_path = get_expected_eval_path(test) if isfile(actual_eval_path): expected_eval = file_to_json(expected_eval_path)['overall'] actual_eval = file_to_json(actual_eval_path)['overall'] for expected_item in expected_eval: class_name = expected_item['class_name'] actual_item = \ next(filter( lambda x: x['class_name'] == class_name, actual_eval)) errors.extend(check_eval_item(test, expected_item, actual_item)) else: errors.append( TestError(test, 'actual eval file does not exist', actual_eval_path)) return errors
def from_model_bundle(model_bundle_uri: str, tmp_dir: str): """Create a Learner from a model bundle.""" model_bundle_path = download_if_needed(model_bundle_uri, tmp_dir) model_bundle_dir = join(tmp_dir, 'model-bundle') unzip(model_bundle_path, model_bundle_dir) config_path = join(model_bundle_dir, 'pipeline-config.json') model_path = join(model_bundle_dir, 'model.pth') config_dict = file_to_json(config_path) config_dict = upgrade_config(config_dict) cfg = build_config(config_dict) return cfg.learner.build(tmp_dir, model_path=model_path)
def _get_run_df(run_dirs): # Combine options/hyperparams and metrics for a run into a df. dfs = [] for run_dir in run_dirs: key = '-'.join(run_dir.split('/')[-2:]) pipeline_cfg_uri = join(run_dir, 'pipeline-config.json') options = get_pipeline_options(key, pipeline_cfg_uri) metrics_uri = join(run_dir, 'train/test_metrics.json') metrics_dict = file_to_json(metrics_uri) df = pd.DataFrame() for ind, (key, val) in enumerate(options.items()): df.insert(ind, key, [val]) df.insert(ind+1, 'building_f1', metrics_dict['building_f1']) dfs.append(df) return pd.concat(dfs, ignore_index=True)
def get_pipeline_options(key, pipeline_cfg_uri): """Returns a dict with the options/hyperparameters for a pipeline run.""" pipeline_dict = file_to_json(pipeline_cfg_uri) solver = pipeline_dict['backend']['solver'] data = pipeline_dict['backend']['data'] num_epochs = solver['num_epochs'] train_sz = data['train_sz_rel'] opts = { 'key': key, 'num_epochs': num_epochs, 'train_sz': train_sz, } return opts
def _run_command(cfg_json_uri: str, command: str, split_ind: Optional[int] = None, num_splits: Optional[int] = None, runner: Optional[str] = None): """Run a single command using a serialized PipelineConfig. Args: cfg_json_uri: URI of a JSON file with a serialized PipelineConfig command: name of command to run split_ind: the index that a split command should assume num_splits: the total number of splits to use runner: the name of the runner to use """ pipeline_cfg_dict = file_to_json(cfg_json_uri) rv_config_dict = pipeline_cfg_dict.get('rv_config') rv_config.set_everett_config(profile=rv_config.profile, config_overrides=rv_config_dict) tmp_dir_obj = rv_config.get_tmp_dir() tmp_dir = tmp_dir_obj.name cfg = build_config(pipeline_cfg_dict) pipeline = cfg.build(tmp_dir) if num_splits is not None and split_ind is None and runner is not None: runner = registry.get_runner(runner)() split_ind = runner.get_split_ind() command_fn = getattr(pipeline, command) if num_splits is not None and num_splits > 1: msg = 'Running {} command split {}/{}...'.format( command, split_ind + 1, num_splits) click.secho(msg, fg='green', bold=True) command_fn(split_ind=split_ind, num_splits=num_splits) else: msg = 'Running {} command...'.format(command) click.secho(msg, fg='green', bold=True) command_fn()
def _collect(key, root_uri, output_dir, collect_dir, get_model_bundle=False): print('\nCollecting experiment {}...\n'.format(key)) model_bundle_uri = join(root_uri, output_dir, 'bundle', 'model-bundle.zip') eval_uri = join(root_uri, output_dir, 'eval', 'eval.json') if not file_exists(eval_uri): print('Missing eval!') return if not file_exists(model_bundle_uri): print('Missing model bundle!') return make_dir(join(collect_dir, key)) if get_model_bundle: download_or_copy(model_bundle_uri, join(collect_dir, key)) download_or_copy(eval_uri, join(collect_dir, key)) eval_json = file_to_json(join(collect_dir, key, 'eval.json')) pprint.pprint(eval_json['overall'], indent=4)
def __init__(self, img_dir, annotation_uri, transform=None): self.img_dir = img_dir self.annotation_uri = annotation_uri self.transform = transform self.img_ids = [] self.id2ann = {} ann_json = file_to_json(annotation_uri) for img in ann_json['images']: img_id = img['id'] self.img_ids.append(img_id) self.id2ann[img_id] = { 'image': img['file_name'], 'bboxes': [], 'category_id': [] } for ann in ann_json['annotations']: img_id = ann['image_id'] bboxes = self.id2ann[img_id]['bboxes'] category_ids = self.id2ann[img_id]['category_id'] bboxes.append(ann['bbox']) category_ids.append(ann['category_id'])
def get_vector_scene(self, class_id, use_aoi=False): gt_uri = data_file_path('{}-gt-polygons.geojson'.format(class_id)) pred_uri = data_file_path('{}-pred-polygons.geojson'.format(class_id)) scene_id = str(class_id) rs = MockRasterSource(channel_order=[0, 1, 3], num_channels=3) rs.set_raster(np.zeros((10, 10, 3))) crs_transformer = IdentityCRSTransformer() extent = Box.make_square(0, 0, 360) config = RasterizedSourceConfig( vector_source=GeoJSONVectorSourceConfig(uri=gt_uri, default_class_id=0), rasterizer_config=RasterizerConfig(background_class_id=1)) gt_rs = config.build(self.class_config, crs_transformer, extent) gt_ls = SemanticSegmentationLabelSource(gt_rs, self.null_class_id) config = RasterizedSourceConfig( vector_source=GeoJSONVectorSourceConfig(uri=pred_uri, default_class_id=0), rasterizer_config=RasterizerConfig(background_class_id=1)) pred_rs = config.build(self.class_config, crs_transformer, extent) pred_ls = SemanticSegmentationLabelSource(pred_rs, self.null_class_id) pred_ls.vector_output = [ PolygonVectorOutputConfig(uri=pred_uri, denoise=0, class_id=class_id) ] if use_aoi: aoi_uri = data_file_path('{}-aoi.geojson'.format(class_id)) aoi_geojson = file_to_json(aoi_uri) aoi_polygons = [shape(aoi_geojson['features'][0]['geometry'])] return Scene(scene_id, rs, gt_ls, pred_ls, aoi_polygons) return Scene(scene_id, rs, gt_ls, pred_ls)