def _test_class_inf(self, props, exp_class_ids, default_class_id=None): geojson = { 'type': 'FeatureCollection', 'features': [{ 'properties': props, 'geometry': { 'type': 'Point', 'coordinates': [1, 1] } }] } json_to_file(geojson, self.uri) class_map = ClassMap.construct_from(['building', 'car', 'tree']) class_id_to_filter = { 1: ['==', 'type', 'building'], 2: ['any', ['==', 'type', 'car'], ['==', 'type', 'auto']] } b = GeoJSONVectorSourceConfigBuilder() \ .with_class_inference(class_id_to_filter=class_id_to_filter, default_class_id=default_class_id) \ .with_uri(self.uri) \ .build() msg = b.to_proto() config = GeoJSONVectorSourceConfig.from_proto(msg) source = config.create_source(crs_transformer=IdentityCRSTransformer(), class_map=class_map) trans_geojson = source.get_geojson() class_ids = [ f['properties']['class_id'] for f in trans_geojson['features'] ] self.assertEqual(class_ids, exp_class_ids)
def run(runner, cfg_path, commands, arg, splits): cfg_module = importlib.import_module(cfg_path) args = dict(arg) get_config = getattr(cfg_module, 'get_config', None) get_configs = get_config if get_config is None: get_configs = getattr(cfg_module, 'get_configs', None) new_args = {} for k, v in args.items(): if v.lower() == 'true': v = True elif v.lower() == 'false': v = False new_args[k] = v args = new_args cfgs = get_configs(runner, **args) if not isinstance(cfgs, list): cfgs = [cfgs] for cfg in cfgs: cfg.update_all() cfg_dict = cfg.dict() cfg_json_uri = join(cfg.root_uri, 'pipeline.json') json_to_file(cfg_dict, cfg_json_uri) pipeline = cfg.get_pipeline() if not commands: commands = pipeline.commands runner = registry.get_runner(runner)() runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
def setUp(self): self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'MultiPolygon', 'coordinates': [[[[0., 0.], [0., 2.], [2., 2.], [2., 0.], [0., 0.]]]] }, 'properties': { 'class_name': 'car', 'class_id': 1, 'score': 0.0 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[2., 2.], [2., 4.], [4., 4.], [4., 2.], [2., 2.]]] }, 'properties': { 'score': 0.0, 'class_name': 'house', 'class_id': 2 } }] } self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')]) class MockTaskConfig(): def __init__(self, class_map): self.class_map = class_map self.task_config = MockTaskConfig(self.class_map) self.box1 = Box.make_square(0, 0, 4) self.box2 = Box.make_square(4, 4, 4) self.class_id1 = 1 self.class_id2 = 2 self.background_class_id = 3 geoms = [] for f in self.geojson['features']: g = shape(f['geometry']) g.class_id = f['properties']['class_id'] geoms.append(g) self.str_tree = STRtree(geoms) self.file_name = 'labels.json' self.temp_dir = RVConfig.get_tmp_dir() self.uri = os.path.join(self.temp_dir.name, self.file_name) json_to_file(self.geojson, self.uri)
def eval_model(self, split): print('Evaluating on {} set...'.format(split)) dl = self.get_dataloader(split) metrics = self.validate_epoch(dl) print('metrics: {}'.format(metrics)) json_to_file(metrics, join(self.output_dir, '{}_metrics.json'.format(split))) self.plot_predictions(split)
def process_scene_data(self, scene, data, tmp_dir): """Process each scene's training data. This writes {scene_id}/{scene_id}-{ind}.png and {scene_id}/{scene_id}-labels.json in COCO format. Args: scene: Scene data: TrainingData Returns: backend-specific data-structures consumed by backend's process_sceneset_results """ scene_dir = join(tmp_dir, str(scene.id)) labels_path = join(scene_dir, '{}-labels.json'.format(scene.id)) make_dir(scene_dir) images = [] annotations = [] categories = [{ 'id': item.id, 'name': item.name } for item in self.task_config.class_map.get_items()] for im_ind, (chip, window, labels) in enumerate(data): im_id = '{}-{}'.format(scene.id, im_ind) fn = '{}.png'.format(im_id) chip_path = join(scene_dir, fn) save_img(chip, chip_path) images.append({ 'file_name': fn, 'id': im_id, 'height': chip.shape[0], 'width': chip.shape[1] }) npboxes = labels.get_npboxes() npboxes = ObjectDetectionLabels.global_to_local(npboxes, window) for box_ind, (box, class_id) in enumerate( zip(npboxes, labels.get_class_ids())): bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]] bbox = [int(i) for i in bbox] annotations.append({ 'id': '{}-{}'.format(im_id, box_ind), 'image_id': im_id, 'bbox': bbox, 'category_id': int(class_id) }) coco_dict = { 'images': images, 'annotations': annotations, 'categories': categories } json_to_file(coco_dict, labels_path) return scene_dir
def __init__(self, cfg: LearnerConfig, tmp_dir, model_path=None): self.cfg = cfg self.tmp_dir = tmp_dir torch_cache_dir = '/opt/data/torch-cache' os.environ['TORCH_HOME'] = torch_cache_dir self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.data_cache_dir = '/opt/data/data-cache' make_dir(self.data_cache_dir) self.model = self.build_model() self.model.to(self.device) if model_path is not None: if isfile(model_path): self.model.load_state_dict( torch.load(model_path, map_location=self.device)) else: raise Exception( 'Model could not be found at {}'.format(model_path)) self.model.eval() else: print(self.cfg) self.train_ds = None self.train_dl = None self.valid_ds = None self.valid_dl = None self.test_ds = None self.test_dl = None if cfg.output_uri.startswith('s3://'): self.output_dir = get_local_path(cfg.output_uri, tmp_dir) make_dir(self.output_dir, force_empty=True) if not cfg.overfit_mode: self.sync_from_cloud() else: self.output_dir = cfg.output_uri make_dir(self.output_dir) self.last_model_path = join(self.output_dir, 'last-model.pth') self.config_path = join(self.output_dir, 'config.json') self.train_state_path = join(self.output_dir, 'train-state.json') self.log_path = join(self.output_dir, 'log.csv') self.model_bundle_path = join(self.output_dir, 'model-bundle.zip') self.metric_names = self.build_metric_names() json_to_file(self.cfg.dict(), self.config_path) self.load_init_weights() self.load_checkpoint() self.opt = self.build_optimizer() self.build_data() self.start_epoch = self.get_start_epoch() self.steps_per_epoch = len( self.train_ds) // self.cfg.solver.batch_sz self.step_scheduler = self.build_step_scheduler() self.epoch_scheduler = self.build_epoch_scheduler()
def save(self, labels): """Save labels to URI.""" boxes = labels.get_boxes() class_ids = labels.get_class_ids().tolist() scores = labels.get_scores().tolist() geojson = boxes_to_geojson(boxes, class_ids, self.crs_transformer, self.class_map, scores=scores) json_to_file(geojson, self.uri)
def save(self, labels): """Save labels to URI if writable. Note that if the grid is inferred from polygons, only the grid will be written, not the original polygons. """ boxes = labels.get_cells() class_ids = labels.get_class_ids() scores = list(labels.get_scores()) geojson = boxes_to_geojson(boxes, class_ids, self.crs_transformer, self.class_map, scores=scores) json_to_file(geojson, self.uri)
def setUp(self): self.prev_keys = (os.environ.get('AWS_ACCESS_KEY_ID'), os.environ.get('AWS_SECRET_ACCESS_KEY')) os.environ['AWS_ACCESS_KEY_ID'] = 'DUMMY' os.environ['AWS_SECRET_ACCESS_KEY'] = 'DUMMY' self.mock_s3 = mock_s3() self.mock_s3.start() self.file_name = 'labels.json' self.temp_dir = RVConfig.get_tmp_dir() self.file_path = os.path.join(self.temp_dir.name, self.file_name) self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.], [0., 0.]]] }, 'properties': { 'class_id': 1, 'score': 0.9 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.], [1., 1.]]] }, 'properties': { 'score': 0.9, 'class_id': 2 } }] } self.extent = Box.make_square(0, 0, 10) self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')]) json_to_file(self.geojson, self.file_path)
def build_source(self, geojson): json_to_file(geojson, self.uri) config = RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_uri(self.uri) \ .with_rasterizer_options(self.background_class_id) \ .build() # Convert to proto and back as a test. config = RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .from_proto(config.to_proto()) \ .build() source = config.create_source(self.uri, self.crs_transformer, self.extent) return source
def setUp(self): self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.], [0., 0.]]] }, 'properties': { 'class_name': 'car', 'class_id': 1 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.], [1., 1.]]] }, 'properties': { 'class_name': 'house', 'class_id': 2 } }] } self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')]) class MockTaskConfig(): def __init__(self, class_map): self.class_map = class_map self.task_config = MockTaskConfig(self.class_map) self.temp_dir = RVConfig.get_tmp_dir() self.uri = os.path.join(self.temp_dir.name, 'labels.json') json_to_file(self.geojson, self.uri)
def transform_geojson(self, geojson, line_bufs=None, point_bufs=None, crs_transformer=None, to_map_coords=False): if crs_transformer is None: crs_transformer = IdentityCRSTransformer() class_map = ClassMap.construct_from(['building']) json_to_file(geojson, self.uri) b = GeoJSONVectorSourceConfigBuilder() \ .with_uri(self.uri) \ .with_buffers(line_bufs=line_bufs, point_bufs=point_bufs) \ .build() msg = b.to_proto() config = GeoJSONVectorSourceConfig.from_proto(msg) source = config.create_source(crs_transformer=crs_transformer, class_map=class_map) return source.get_geojson(to_map_coords=to_map_coords)
def setUp(self): self.file_name = 'labels.json' self.temp_dir = RVConfig.get_tmp_dir() self.file_path = os.path.join(self.temp_dir.name, self.file_name) self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.], [0., 0.]]] }, 'properties': { 'class_id': 1, 'score': 0.9 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.], [1., 1.]]] }, 'properties': { 'score': 0.9, 'class_id': 2 } }] } self.extent = Box.make_square(0, 0, 10) self.class_map = ClassMap([ClassItem(1, 'car'), ClassItem(2, 'house')]) json_to_file(self.geojson, self.file_path)
def compute_coco_eval(outputs, targets, num_labels): """Return mAP averaged over 0.5-0.95 using pycocotools eval. Note: boxes are in (ymin, xmin, ymax, xmax) format with values ranging from 0 to h or w. Args: outputs: (list) of length m containing dicts of form {'boxes': <tensor with shape (n, 4)>, 'labels': <tensor with shape (n,)>, 'scores': <tensor with shape (n,)>} targets: (list) of length m containing dicts of form {'boxes': <tensor with shape (n, 4)>, 'labels': <tensor with shape (n,)>} """ with tempfile.TemporaryDirectory() as tmp_dir: preds = get_coco_preds(outputs) # ap is undefined when there are no predicted boxes if len(preds) == 0: return None gt = get_coco_gt(targets, num_labels) gt_path = join(tmp_dir, 'gt.json') json_to_file(gt, gt_path) coco_gt = COCO(gt_path) pycocotools.coco.unicode = None coco_preds = coco_gt.loadRes(preds) ann_type = 'bbox' coco_eval = COCOeval(coco_gt, coco_preds, ann_type) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval
def save_image_crop(image_uri, image_crop_uri, label_uri=None, label_crop_uri=None, size=600, min_features=10): """Save a crop of an image to use for testing. If label_uri is set, the crop needs to cover >= min_features. Args: image_uri: URI of original image image_crop_uri: URI of cropped image to save label_uri: optional URI of GeoJSON file size: height and width of crop Raises: ValueError if cannot find a crop satisfying min_features constraint. """ if not file_exists(image_crop_uri): print('Saving test crop to {}...'.format(image_crop_uri)) old_environ = os.environ.copy() try: request_payer = S3FileSystem.get_request_payer() if request_payer == 'requester': os.environ['AWS_REQUEST_PAYER'] = request_payer im_dataset = rasterio.open(image_uri) h, w = im_dataset.height, im_dataset.width extent = Box(0, 0, h, w) windows = extent.get_windows(size, size) if label_uri is not None: crs_transformer = RasterioCRSTransformer.from_dataset( im_dataset) vs = GeoJSONVectorSource(label_uri, crs_transformer) geojson = vs.get_geojson() geoms = [] for f in geojson['features']: g = shape(f['geometry']) geoms.append(g) tree = STRtree(geoms) def p2m(x, y, z=None): return crs_transformer.pixel_to_map((x, y)) for w in windows: use_window = True if label_uri is not None: w_polys = tree.query(w.to_shapely()) use_window = len(w_polys) >= min_features if use_window and label_crop_uri is not None: print('Saving test crop labels to {}...'.format( label_crop_uri)) label_crop_features = [ mapping(shapely.ops.transform(p2m, wp)) for wp in w_polys ] label_crop_json = { 'type': 'FeatureCollection', 'features': [{ 'geometry': f } for f in label_crop_features] } json_to_file(label_crop_json, label_crop_uri) if use_window: w = w.rasterio_format() im = im_dataset.read(window=w) with tempfile.TemporaryDirectory() as tmp_dir: crop_path = get_local_path(image_crop_uri, tmp_dir) make_dir(crop_path, use_dirname=True) meta = im_dataset.meta meta['width'], meta['height'] = size, size meta['transform'] = rasterio.windows.transform( w, im_dataset.transform) with rasterio.open(crop_path, 'w', **meta) as dst: dst.colorinterp = im_dataset.colorinterp dst.write(im) upload_or_copy(crop_path, image_crop_uri) break if not use_window: raise ValueError('Could not find a good crop.') finally: os.environ.clear() os.environ.update(old_environ)
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. batch_size = self.train_opts.batch_size chip_size = self.task_config.chip_size class_names = self.class_map.get_class_names() databunch = build_databunch(chip_dir, chip_size, batch_size, class_names) log.info(databunch) num_labels = len(databunch.label_names) if self.train_opts.debug: make_debug_chips(databunch, self.class_map, tmp_dir, train_uri) # Setup model num_labels = len(databunch.label_names) model = get_model(self.train_opts.model_arch, num_labels, pretrained=True) model = model.to(self.device) model_path = join(train_dir, 'model') # Load weights from a pretrained model. pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: log.info('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) model.load_state_dict( torch.load(pretrained_path, map_location=self.device)) # Possibly resume training from checkpoint. start_epoch = 0 train_state_path = join(train_dir, 'train_state.json') if isfile(train_state_path): log.info('Resuming from checkpoint: {}\n'.format(model_path)) train_state = file_to_json(train_state_path) start_epoch = train_state['epoch'] + 1 model.load_state_dict( torch.load(model_path, map_location=self.device)) # Write header of log CSV file. metric_names = ['precision', 'recall', 'f1'] log_path = join(train_dir, 'log.csv') if not isfile(log_path): with open(log_path, 'w') as log_file: log_writer = csv.writer(log_file) row = ['epoch', 'time', 'train_loss'] + metric_names log_writer.writerow(row) # Setup Tensorboard logging. if self.train_opts.log_tensorboard: log_dir = join(train_dir, 'tb-logs') make_dir(log_dir) tb_writer = SummaryWriter(log_dir=log_dir) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) # Setup optimizer, loss, and LR scheduler. loss_fn = torch.nn.CrossEntropyLoss() lr = self.train_opts.lr opt = optim.Adam(model.parameters(), lr=lr) step_scheduler, epoch_scheduler = None, None num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle and num_epochs > 1: steps_per_epoch = len(databunch.train_ds) // batch_size total_steps = num_epochs * steps_per_epoch step_size_up = (num_epochs // 2) * steps_per_epoch step_size_down = total_steps - step_size_up step_scheduler = CyclicLR(opt, base_lr=lr / 10, max_lr=lr, step_size_up=step_size_up, step_size_down=step_size_down, cycle_momentum=False) for _ in range(start_epoch * steps_per_epoch): step_scheduler.step() # Training loop. for epoch in range(start_epoch, num_epochs): # Train one epoch. log.info('-----------------------------------------------------') log.info('epoch: {}'.format(epoch)) start = time.time() train_loss = train_epoch(model, self.device, databunch.train_dl, opt, loss_fn, step_scheduler) if epoch_scheduler: epoch_scheduler.step() log.info('train loss: {}'.format(train_loss)) # Validate one epoch. metrics = validate_epoch(model, self.device, databunch.valid_dl, num_labels) log.info('validation metrics: {}'.format(metrics)) # Print elapsed time for epoch. end = time.time() epoch_time = datetime.timedelta(seconds=end - start) log.info('epoch elapsed time: {}'.format(epoch_time)) # Save model and state. torch.save(model.state_dict(), model_path) train_state = {'epoch': epoch} json_to_file(train_state, train_state_path) # Append to log CSV file. with open(log_path, 'a') as log_file: log_writer = csv.writer(log_file) row = [epoch, epoch_time, train_loss] row += [metrics[k] for k in metric_names] log_writer.writerow(row) # Write to Tensorboard log. if self.train_opts.log_tensorboard: for key, val in metrics.items(): tb_writer.add_scalar(key, val, epoch) tb_writer.add_scalar('train_loss', train_loss, epoch) for name, param in model.named_parameters(): tb_writer.add_histogram(name, param, epoch) if (train_uri.startswith('s3://') and (((epoch + 1) % self.train_opts.sync_interval) == 0)): sync_to_dir(train_dir, train_uri) # Close Tensorboard. if self.train_opts.log_tensorboard: tb_writer.close() if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def save_image_crop(image_uri, image_crop_uri, label_uri=None, label_crop_uri=None, size=600, min_features=10, vector_labels=True): """Save a crop of an image to use for testing. If label_uri is set, the crop needs to cover >= min_features. Args: image_uri: URI of original image image_crop_uri: URI of cropped image to save label_uri: optional URI of label file label_crop_uri: optional URI of cropped labels to save size: height and width of crop Raises: ValueError if cannot find a crop satisfying min_features constraint. """ if not file_exists(image_crop_uri): print('Saving test crop to {}...'.format(image_crop_uri)) old_environ = os.environ.copy() try: request_payer = S3FileSystem.get_request_payer() if request_payer == 'requester': os.environ['AWS_REQUEST_PAYER'] = request_payer im_dataset = rasterio.open(image_uri) h, w = im_dataset.height, im_dataset.width extent = Box(0, 0, h, w) windows = extent.get_windows(size, size) if label_uri and vector_labels: crs_transformer = RasterioCRSTransformer.from_dataset( im_dataset) vs = GeoJSONVectorSource(label_uri, crs_transformer) geojson = vs.get_geojson() geoms = [] for f in geojson['features']: g = shape(f['geometry']) geoms.append(g) tree = STRtree(geoms) def p2m(x, y, z=None): return crs_transformer.pixel_to_map((x, y)) for w in windows: use_window = True if label_uri and vector_labels: w_polys = tree.query(w.to_shapely()) use_window = len(w_polys) >= min_features if use_window and label_crop_uri is not None: print('Saving test crop labels to {}...'.format( label_crop_uri)) label_crop_features = [ mapping(shapely.ops.transform(p2m, wp)) for wp in w_polys ] label_crop_json = { 'type': 'FeatureCollection', 'features': [{ 'geometry': f } for f in label_crop_features] } json_to_file(label_crop_json, label_crop_uri) if use_window: crop_image(image_uri, w, image_crop_uri) if not vector_labels and label_uri and label_crop_uri: crop_image(label_uri, w, label_crop_uri) break if not use_window: raise ValueError('Could not find a good crop.') finally: os.environ.clear() os.environ.update(old_environ)