def exp_main(self, raw_uri, root_uri, test=False, learning_rates='0.001'): """Run a hyper-parameter search experiment on Spacenet Vegas. Generates an experiment for each learning rate using a TF Deeplab semantic segmentation backend on the Spacenet Vegas Buildings dataset. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output learning_rates: (str) comma-delimited list of learning rates to use """ test = str_to_bool(test) target = BUILDINGS task_type = rv.SEMANTIC_SEGMENTATION learning_rates = learning_rates.split(',') task_type = task_type.upper() spacenet_config = SpacenetConfig.create(raw_uri, target) ac_key = '{}_{}'.format(target, task_type.lower()) validate_options(task_type, target) task = build_task(task_type, spacenet_config.get_class_map()) analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() dataset = build_dataset(task, spacenet_config, test) # Reduce number of scenes dataset.train_scenes = dataset.train_scenes[0:2**7] exps = [] for learning_rate in learning_rates: backend = build_backend(task, test, learning_rate) exp_id = '{}_{}_rate={}'.format(target, task_type.lower(), learning_rate) # Need to use stats_analyzer because imagery is uint16. # Set the analyze and chip key to share analyze and chip output # between the experiments. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .with_analyze_key(ac_key) \ .with_chip_key(ac_key) exps.append(experiment.build()) return exps
def exp_main(self, raw_uri, root_uri, test=False, target=BUILDINGS, task_type=rv.SEMANTIC_SEGMENTATION, vector_tile_options=None): """Run an experiment on the Spacenet Vegas road or building dataset. This is an example of how to do all three tasks on the same dataset. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output target: (str) 'buildings' or 'roads' task_type: (str) 'semantic_segmentation', 'object_detection', or 'chip_classification' vector_tile_options: (str or None) space delimited list of uri, zoom, and id_field. See VectorTileVectorSourceConfigBuilder.with_uri, .with_zoom and .with_id_field methods for more details. """ test = str_to_bool(test) exp_id = '{}-{}'.format(target, task_type.lower()) task_type = task_type.upper() spacenet_config = SpacenetConfig.create(raw_uri, target) validate_options(task_type, target, vector_tile_options) vector_tile_options = VectorTileOptions.build(vector_tile_options) task = build_task(task_type, spacenet_config.get_class_map()) backend = build_backend(task, test) analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() dataset = build_dataset(task, spacenet_config, test, vector_tile_options=vector_tile_options) # Need to use stats_analyzer because imagery is uint16. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, exp_id, config, raw_uri, root_uri, test=False): test = str_to_bool(test) exp_id = 'duke-seg2' debug = False chip_size = 300 task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .predict_package_uri('/opd/data/rv/try2/duke-seg2/predict_package.zip')\ .with_chip_size(chip_size) \ .with_classes({ 'PV': (1, 'yellow'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=0.1, target_classes=[1], target_count_threshold=1000) \ .build() backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options(**config) \ .with_pretrained_model(uri)\ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False, use_tf=False): """Run an experiment on the ISPRS Potsdam dataset. Uses Tensorflow Deeplab backend with Mobilenet architecture. Should get to F1 score of ~0.86 (including clutter class) after 6 hours of training on a P3 instance. Args: raw_uri: (str) directory of raw data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output use_tf: (bool) if True, use Tensorflow Deeplab backend. """ test = str_to_bool(test) use_tf = str_to_bool(use_tf) exp_id = 'potsdam-seg' train_ids = [ '2-10', '2-11', '3-10', '3-11', '4-10', '4-11', '4-12', '5-10', '5-11', '5-12', '6-10', '6-11', '6-7', '6-9', '7-10', '7-11', '7-12', '7-7', '7-8', '7-9' ] val_ids = ['2-12', '3-12', '6-12'] # infrared, red, green channel_order = [3, 0, 1] debug = False if test: debug = True train_ids = train_ids[0:1] val_ids = val_ids[0:1] exp_id += '-test' classes = { 'Car': (1, '#ffff00'), 'Building': (2, '#0000ff'), 'Low Vegetation': (3, '#00ffff'), 'Tree': (4, '#00ff00'), 'Impervious': (5, "#ffffff"), 'Clutter': (6, "#ff0000") } task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes(classes) \ .with_chip_options(window_method='sliding', stride=300, debug_chip_probability=1.0) \ .build() if use_tf: batch_size = 8 num_steps = 100000 if test: num_steps = 1 batch_size = 2 model_type = rv.MOBILENET_V2 backend = rv.BackendConfig.builder(rv.TF_DEEPLAB) \ .with_task(task) \ .with_model_defaults(model_type) \ .with_train_options(sync_interval=600) \ .with_num_steps(num_steps) \ .with_batch_size(batch_size) \ .with_debug(debug) \ .build() else: batch_size = 8 num_epochs = 10 if test: batch_size = 2 num_epochs = 1 backend = rv.BackendConfig.builder(rv.PYTORCH_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options( lr=1e-4, batch_size=batch_size, num_epochs=num_epochs, model_arch='resnet50', debug=debug) \ .build() def make_scene(id): id = id.replace('-', '_') raster_uri = '{}/4_Ortho_RGBIR/top_potsdam_{}_RGBIR.tif'.format( raw_uri, id) label_uri = '{}/5_Labels_for_participants/top_potsdam_{}_label.tif'.format( raw_uri, id) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, size=600) raster_uri = crop_uri # Using with_rgb_class_map because label TIFFs have classes encoded as RGB colors. label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_rgb_class_map(task.class_map) \ .with_raster_source(label_uri) \ .build() # URI will be injected by scene config. # Using with_rgb(True) because we want prediction TIFFs to be in RGB format. label_store = rv.LabelStoreConfig.builder(rv.SEMANTIC_SEGMENTATION_RASTER) \ .with_rgb(True) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri, channel_order=channel_order) \ .with_label_source(label_source) \ .with_label_store(label_store) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, raw_uri, root_uri, test=False): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ base_uri = join( raw_uri, 'SpaceNet_Buildings_Dataset_Round2/spacenetV2_Train/AOI_2_Vegas') raster_uri = join(base_uri, 'RGB-PanSharpen') label_uri = join(base_uri, 'geojson/buildings') raster_fn_prefix = 'RGB-PanSharpen_AOI_2_Vegas_img' label_fn_prefix = 'buildings_AOI_2_Vegas_img' label_paths = list_paths(label_uri, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths] random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Workaround to handle scene 1000 missing on S3. if '1000' in scene_ids: scene_ids.remove('1000') num_train_ids = int(len(scene_ids) * 0.8) train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] test = str_to_bool(test) exp_id = 'spacenet-simple-seg' num_epochs = 5 batch_sz = 8 debug = False chip_size = 300 if test: exp_id += '-test' num_epochs = 2 batch_sz = 1 debug = True train_ids = ['12'] val_ids = ['13'] task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(chip_size) \ .with_classes({ 'Building': (1, 'orange'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=1.0, target_classes=[1], target_count_threshold=1000) \ .build() config = { 'bs': batch_sz, 'num_epochs': num_epochs, 'debug': debug, 'lr': 1e-4 } backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_config(config) \ .build() def make_scene(id): train_image_uri = os.path.join(raster_uri, '{}{}.tif'.format(raster_fn_prefix, id)) raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() vector_source = os.path.join( label_uri, '{}{}.geojson'.format(label_fn_prefix, id)) label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() # Need to use stats_analyzer because imagery is uint16. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def get_exp(self, exp_id, config, raw_uri, processed_uri, root_uri, test=False, pred_chip_size=300): """Run an experiment on the ISPRS Potsdam dataset. Args: raw_uri: (str) directory of raw data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) train_ids = [ '2-10', '2-11', '3-10', '3-11', '4-10', '4-11', '4-12', '5-10', '5-11', '5-12', '6-10', '6-11', '6-7', '6-9', '7-10', '7-11', '7-12', '7-7', '7-8', '7-9' ] val_ids = ['2-12', '3-12', '6-12'] # infrared, red, green channel_order = [3, 0, 1] chip_key = 'potsdam-seg' if test: config['debug'] = True config['batch_sz'] = 1 config['num_epochs'] = 1 train_ids = train_ids[0:1] val_ids = val_ids[0:1] exp_id += '-test' chip_key += '-test' classes = { 'Car': (1, '#ffff00'), 'Building': (2, '#0000ff'), 'Low Vegetation': (3, '#00ffff'), 'Tree': (4, '#00ff00'), 'Impervious': (5, "#ffffff"), 'Clutter': (6, "#ff0000") } task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes(classes) \ .with_chip_options(window_method='sliding', stride=300, debug_chip_probability=1.0) \ .build() backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(id): id = id.replace('-', '_') raster_uri = '{}/4_Ortho_RGBIR/top_potsdam_{}_RGBIR.tif'.format( raw_uri, id) label_uri = '{}/5_Labels_for_participants/top_potsdam_{}_label.tif'.format( raw_uri, id) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, size=600) raster_uri = crop_uri # Using with_rgb_class_map because label TIFFs have classes encoded as RGB colors. label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_rgb_class_map(task.class_map) \ .with_raster_source(label_uri) \ .build() # URI will be injected by scene config. # Using with_rgb(True) because we want prediction TIFFs to be in RGB format. label_store = rv.LabelStoreConfig.builder(rv.SEMANTIC_SEGMENTATION_RASTER) \ .with_rgb(True) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri, channel_order=channel_order) \ .with_label_source(label_source) \ .with_label_store(label_store) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_chip_key(chip_key) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, raw_uri, root_uri, test=False): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ # Specify the location of the raw data base_uri = join(raw_uri, 'spacenet/SN2_buildings/train/AOI_2_Vegas') # The images and labels are in two separate directories within the base_uri raster_uri = join(base_uri, 'PS-RGB') label_uri = join(base_uri, 'geojson_buildings') # The tiff (raster) and geojson (label) files have have a naming convention of # '[prefix]_[image id].geojson.' The prefix indicates the type of data and the # image id indicates which scene each is associated with. raster_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_PS-RGB_img' label_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_geojson_buildings_img' # Find all of the image ids that have associated images and labels. Collect # these values to use as our scene ids. label_paths = list_paths(label_uri, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths ] # Set some trainin parameters: # The exp_id will be the label associated with this experiment, it will be used # to name the experiment config json. exp_id = 'spacenet-simple-seg' # Number of times to go through the entire dataset during training. num_epochs = 2 # Number of images in each batch batch_size = 8 # Specify whether or not to make debug chips (a zipped sample of png chips # that you can examine to help debug the chipping process) debug = False # This experiment includes an option to run a small test experiment before # running the whole thing. You can set this using the 'test' parameter. If # this parameter is set to True it will run a tiny test example with a new # experiment id. This will be small enough to run locally. It is recommended # to run a test example locally before submitting the whole experiment to AWs # Batch. test = str_to_bool(test) if test: exp_id += '-test' num_epochs = 1 batch_size = 2 debug = True scene_ids = scene_ids[0:10] # Split the data into training and validation sets: # Randomize the order of all scene ids random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Workaround to handle scene 1000 missing on S3. if '1000' in scene_ids: scene_ids.remove('1000') # Figure out how many scenes make up 80% of the whole set num_train_ids = round(len(scene_ids) * 0.8) # Split the scene ids into training and validation lists train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] # The TaskConfigBuilder constructs a child class of TaskConfig that # corresponds to the type of computer vision task you are taking on. # This experiment includes a semantic segmentation task but Raster # Vision also has backends for object detection and chip classification. # Before building the task config you can also set parameters using # 'with_' methods. In the example below we set the chip size, the # pixel class names and colors, and addiitonal chip options. task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes({ 'Building': (1, 'orange'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=1.0, target_classes=[1], target_count_threshold=1000) \ .build() # Next we will create a backend that is built on top of a third-party # deep learning library. In this case we will construct the # BackendConfig for the pytorch semantic segmentation backend. backend = rv.BackendConfig.builder(rv.PYTORCH_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options( lr=1e-4, batch_size=batch_size, num_epochs=num_epochs, model_arch='resnet50', debug=debug) \ .build() # We will use this function to create a list of scenes that we will pass # to the DataSetConfig builder. def make_scene(id): """Make a SceneConfig object for each image/label pair Args: id (str): The id that corresponds to both the .tiff image source and .geojson label source for a given scene Returns: rv.data.SceneConfig: a SceneConfig object which is composed of images, labels and optionally AOIs """ # Find the uri for the image associated with this is train_image_uri = os.path.join( raster_uri, '{}{}.tif'.format(raster_fn_prefix, id)) # Construct a raster source from an image uri that can be handled by Rasterio. # We also specify the order of image channels by their indices and add a # stats transformer which normalizes pixel values into uint8. raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() # Next create a label source config to pair with the raster source: # define the geojson label source uri vector_source = os.path.join( label_uri, '{}{}.geojson'.format(label_fn_prefix, id)) # Since this is a semantic segmentation experiment and the labels # are distributed in a vector-based GeoJSON format, we need to rasterize # the labels. We create aRasterSourceConfigBuilder using # `rv.RASTERIZED_SOURCE` # indicating that it will come from a vector source. We then specify the uri # of the vector source and (in the 'with_rasterizer_options' method) the id # of the pixel class we would like to use as background. label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() # Create a semantic segmentation label source from rasterized source config # that we built in the previous line. label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() # Finally we can build a scene config object using the scene id and the # configs we just defined scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene # Create lists of train and test scene configs train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] # Construct a DataSet config using the lists of train and # validation scenes dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() # We will need to convert this imagery from uint16 to uint8 # in order to use it. We specified that this conversion should take place # when we built the train raster source but that process will require # dataset-level statistics. To get these stats we need to create an # analyzer. analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() # We use the previously-constructed configs to create the constituent # parts of the experiment. We also give the builder strings that define # the experiment id and and root uri. The root uri indicates where all # of the output will be written. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() # Return one or more experiment configs to run the experiment(s) return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False): """Chip classification experiment on Spacenet Rio dataset. Run the data prep notebook before running this experiment. Note all URIs can be local or remote. Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'spacenet-rio-chip-classification' num_epochs = 20 batch_size = 16 debug = False train_scene_info = get_scene_info(join(processed_uri, 'train-scenes.csv')) val_scene_info = get_scene_info(join(processed_uri, 'val-scenes.csv')) if test: exp_id += '-test' num_epochs = 1 batch_size = 1 debug = True train_scene_info = train_scene_info[0:1] val_scene_info = val_scene_info[0:1] task = rv.TaskConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_chip_size(200) \ .with_classes({ 'building': (1, 'red'), 'no_building': (2, 'black') }) \ .build() backend = rv.BackendConfig.builder(rv.KERAS_CLASSIFICATION) \ .with_task(task) \ .with_model_defaults(rv.RESNET50_IMAGENET) \ .with_debug(debug) \ .with_batch_size(batch_size) \ .with_num_epochs(num_epochs) \ .with_config({ 'trainer': { 'options': { 'saveBest': True, 'lrSchedule': [ { 'epoch': 0, 'lr': 0.0005 }, { 'epoch': 10, 'lr': 0.0001 }, { 'epoch': 15, 'lr': 0.00001 } ] } } }, set_missing_keys=True) \ .build() def make_scene(scene_info): (raster_uri, label_uri) = scene_info raster_uri = join(raw_uri, raster_uri) label_uri = join(processed_uri, label_uri) aoi_uri = join(raw_uri, aoi_path) if test: crop_uri = join( processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=600, min_features=20) raster_uri = crop_uri id = os.path.splitext(os.path.basename(raster_uri))[0] label_source = rv.LabelSourceConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_uri(label_uri) \ .with_ioa_thresh(0.5) \ .with_use_intersection_over_cell(False) \ .with_pick_min_class_id(True) \ .with_background_class_id(2) \ .with_infer_cells(True) \ .build() return rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri) \ .with_label_source(label_source) \ .with_aoi_uri(aoi_uri) \ .build() train_scenes = [make_scene(info) for info in train_scene_info] val_scenes = [make_scene(info) for info in val_scene_info] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False): """Semantic segmentation experiment on Spacenet Rio dataset. Run the data prep notebook before running this experiment. Note all URIs can be local or remote. Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'spacenet-rio-semseg' debug = False batch_size = 8 num_epochs = 20 train_scene_info = get_scene_info( join(processed_uri, 'train-scenes.csv')) val_scene_info = get_scene_info(join(processed_uri, 'val-scenes.csv')) if test: exp_id += '-test' debug = True num_epochs = 1 batch_size = 2 train_scene_info = train_scene_info[0:1] val_scene_info = val_scene_info[0:1] class_map = {'Building': (1, 'orange'), 'Background': (2, 'black')} task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes(class_map) \ .with_chip_options( stride=300, window_method='sliding', debug_chip_probability=1.0) \ .build() backend = rv.BackendConfig.builder(rv.PYTORCH_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options( lr=1e-4, batch_size=batch_size, num_epochs=num_epochs, model_arch='resnet18', debug=debug) \ .build() def make_scene(scene_info): (raster_uri, label_uri) = scene_info raster_uri = join(raw_uri, raster_uri) label_uri = join(processed_uri, label_uri) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=600) raster_uri = crop_uri aoi_uri = join(raw_uri, aoi_path) id = os.path.splitext(os.path.basename(raster_uri))[0] background_class_id = 2 label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(label_uri) \ .with_rasterizer_options(background_class_id) \ .build() label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() return rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri) \ .with_label_source(label_source) \ .with_aoi_uri(aoi_uri) \ .build() train_scenes = [make_scene(info) for info in train_scene_info] val_scenes = [make_scene(info) for info in val_scene_info] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def exp_xview(self, raw_uri, processed_uri, root_uri, test=False): """Object detection experiment on xView data. Run the data prep notebook before running this experiment. Note all URIs can be local or remote. Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'xview-vehicles' num_steps = 100000 batch_size = 16 debug = False train_scene_info = get_scene_info( join(processed_uri, 'train-scenes.csv')) val_scene_info = get_scene_info(join(processed_uri, 'val-scenes.csv')) if test: exp_id += '-test' batch_size = 2 num_steps = 1 debug = True train_scene_info = train_scene_info[0:1] val_scene_info = val_scene_info[0:1] task = rv.TaskConfig.builder(rv.OBJECT_DETECTION) \ .with_chip_size(300) \ .with_classes({'vehicle': (1, 'red')}) \ .with_chip_options(neg_ratio=1.0, ioa_thresh=0.8) \ .with_predict_options(merge_thresh=0.1, score_thresh=0.5) \ .build() backend = rv.BackendConfig.builder(rv.TF_OBJECT_DETECTION) \ .with_task(task) \ .with_debug(debug) \ .with_batch_size(batch_size) \ .with_num_steps(num_steps) \ .with_model_defaults(rv.SSD_MOBILENET_V1_COCO) \ .build() def make_scene(scene_info): (raster_uri, label_uri) = scene_info raster_uri = join(raw_uri, raster_uri) label_uri = join(processed_uri, label_uri) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, size=600, min_features=5) raster_uri = crop_uri id = os.path.splitext(os.path.basename(raster_uri))[0] label_source = rv.LabelSourceConfig.builder(rv.OBJECT_DETECTION) \ .with_uri(label_uri) \ .build() return rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri) \ .with_label_source(label_source) \ .build() train_scenes = [make_scene(info) for info in train_scene_info] val_scenes = [make_scene(info) for info in val_scene_info] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def get_exp(self, exp_id, config, raw_uri, processed_uri, root_uri, test=False): """Chip classification experiment on Spacenet Rio dataset. Run the data prep notebook before running this experiment. Note all URIs can be local or remote. Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) train_scene_info = get_scene_info( join(processed_uri, 'train-scenes.csv')) val_scene_info = get_scene_info(join(processed_uri, 'val-scenes.csv')) chip_key = 'spacenet-rio-chip-classification' if test: exp_id += '-test' config['num_epochs'] = 1 config['batch_sz'] = 8 config['debug'] = True train_scene_info = train_scene_info[0:1] val_scene_info = val_scene_info[0:1] task = rv.TaskConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_chip_size(200) \ .with_classes({ 'building': (1, 'red'), 'no_building': (2, 'black') }) \ .build() backend = rv.BackendConfig.builder(FASTAI_CHIP_CLASSIFICATION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(scene_info): (raster_uri, label_uri) = scene_info raster_uri = join(raw_uri, raster_uri) label_uri = join(processed_uri, label_uri) aoi_uri = join(raw_uri, aoi_path) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=600, min_features=5) raster_uri = crop_uri id = os.path.splitext(os.path.basename(raster_uri))[0] label_source = rv.LabelSourceConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_uri(label_uri) \ .with_ioa_thresh(0.5) \ .with_use_intersection_over_cell(False) \ .with_pick_min_class_id(True) \ .with_background_class_id(2) \ .with_infer_cells(True) \ .build() return rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_uri) \ .with_label_source(label_source) \ .with_aoi_uri(aoi_uri) \ .build() train_scenes = [make_scene(info) for info in train_scene_info] val_scenes = [make_scene(info) for info in val_scene_info] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_chip_key(chip_key) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False): """Object detection on COWC (Cars Overhead with Context) Potsdam dataset Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'cowc-object-detection' num_steps = 100000 batch_size = 8 debug = False train_scene_ids = ['2_10', '2_11', '2_12', '2_14', '3_11', '3_13', '4_10', '5_10', '6_7', '6_9'] val_scene_ids = ['2_13', '6_8', '3_10'] if test: exp_id += '-test' num_steps = 1 batch_size = 1 debug = True train_scene_ids = train_scene_ids[0:1] val_scene_ids = val_scene_ids[0:1] task = rv.TaskConfig.builder(rv.OBJECT_DETECTION) \ .with_chip_size(300) \ .with_classes({'vehicle': (1, 'red')}) \ .with_chip_options(neg_ratio=1.0, ioa_thresh=0.8) \ .with_predict_options(merge_thresh=0.1, score_thresh=0.5) \ .build() backend = rv.BackendConfig.builder(rv.TF_OBJECT_DETECTION) \ .with_task(task) \ .with_model_defaults(rv.SSD_MOBILENET_V1_COCO) \ .with_debug(debug) \ .with_batch_size(batch_size) \ .with_num_steps(num_steps) \ .build() def make_scene(id): raster_uri = join( raw_uri, '4_Ortho_RGBIR/top_potsdam_{}_RGBIR.tif'.format(id)) label_uri = join( processed_uri, 'labels', 'all', 'top_potsdam_{}_RGBIR.json'.format(id)) if test: crop_uri = join( processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=1000, min_features=5) raster_uri = crop_uri return rv.SceneConfig.builder() \ .with_id(id) \ .with_task(task) \ .with_raster_source(raster_uri, channel_order=[0, 1, 2]) \ .with_label_source(label_uri) \ .build() train_scenes = [make_scene(id) for id in train_scene_ids] val_scenes = [make_scene(id) for id in val_scene_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def exp_main(self, exp_id, config, raw_uri, root_uri, test=False): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ base_uri = join( raw_uri, 'duke') raster_uri = base_uri label_uri = join(raw_uri, 'duke_labels') scene_ids = [os.path.basename(x).replace('.geojson', '') for x in glob.glob(join(label_uri, '*.geojson'))] random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) num_train_ids = int(len(scene_ids) * 0.8) train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] test = str_to_bool(test) exp_id = 'duke-seg3' debug = False chip_size = 300 if test: exp_id += '-test' train_ids = ['11ska355800'] val_ids = ['11ska490710'] config['debug'] = False config['batch_sz'] = 1 config['num_epochs'] = 1 task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(chip_size) \ .with_classes({ 'PV': (1, 'yellow'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=0.1, target_classes=[1], target_count_threshold=1000) \ .build() backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(id): train_image_uri = os.path.join(raster_uri, '{}.tif'.format(id)) raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() vector_source = os.path.join( label_uri, '{}.geojson'.format(id)) label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .with_sample_prob(0.1) \ .build() # Need to use stats_analyzer because imagery is uint16. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False): """Object detection on COWC (Cars Overhead with Context) Potsdam dataset Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'cowc-object-detection' num_epochs = 50 batch_sz = 16 debug = False lr = 2e-5 model_arch = 'resnet18' sync_interval = 10 train_scene_ids = [ '2_10', '2_11', '2_12', '2_14', '3_11', '3_13', '4_10', '5_10', '6_7', '6_9' ] val_scene_ids = ['2_13', '6_8', '3_10'] if test: exp_id += '-test' num_epochs = 1 batch_sz = 1 debug = True train_scene_ids = train_scene_ids[0:1] val_scene_ids = val_scene_ids[0:1] # XXX set neg_ratio to 0 for testing purposes # since fastai can't handle neg chips afaik. task = rv.TaskConfig.builder(rv.OBJECT_DETECTION) \ .with_chip_size(300) \ .with_classes({'vehicle': (1, 'red')}) \ .with_chip_options(neg_ratio=0.0, ioa_thresh=0.8) \ .with_predict_options(merge_thresh=0.3, score_thresh=0.5) \ .build() config = { 'batch_sz': batch_sz, 'num_epochs': num_epochs, 'debug': debug, 'lr': lr, 'sync_interval': sync_interval, 'model_arch': model_arch } backend = rv.BackendConfig.builder(FASTAI_OBJECT_DETECTION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(id): raster_uri = join( raw_uri, '4_Ortho_RGBIR/top_potsdam_{}_RGBIR.tif'.format(id)) label_uri = join(processed_uri, 'labels', 'all', 'top_potsdam_{}_RGBIR.json'.format(id)) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=1000, min_features=5) raster_uri = crop_uri return rv.SceneConfig.builder() \ .with_id(id) \ .with_task(task) \ .with_raster_source(raster_uri, channel_order=[0, 1, 2]) \ .with_label_source(label_uri) \ .build() train_scenes = [make_scene(id) for id in train_scene_ids] val_scenes = [make_scene(id) for id in val_scene_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment
def exp_main(self, raw_uri, processed_uri, root_uri, test=False): """Chip Classification on COWC (Cars Overhead with Context) Potsdam Args: raw_uri: (str) directory of raw data processed_uri: (str) directory of processed data root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ test = str_to_bool(test) exp_id = 'cowc-chip-classification' num_epochs = 50 batch_sz = 16 debug = False lr = 2e-5 model_arch = 'resnet18' sync_interval = 10 train_scene_ids = [ '2_10', '2_11', '2_12', '2_14', '3_11', '3_13', '4_10', '5_10', '6_7', '6_9' ] val_scene_ids = ['2_13', '6_8', '3_10'] if test: exp_id += '-test' num_epochs = 2 batch_sz = 2 debug = True train_scene_ids = train_scene_ids[0:1] val_scene_ids = val_scene_ids[0:1] task = rv.TaskConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_chip_size(200) \ .with_classes({ 'car': (1, 'red'), 'background': (2, 'black') }) \ .build() config = { 'batch_sz': batch_sz, 'num_epochs': num_epochs, 'debug': debug, 'lr': lr, 'sync_interval': sync_interval, 'model_arch': model_arch } backend = rv.BackendConfig.builder(FASTAI_CHIP_CLASSIFICATION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(id): raster_uri = join( raw_uri, '4_Ortho_RGBIR/top_potsdam_{}_RGBIR.tif'.format(id)) label_uri = join(processed_uri, 'labels', 'all', 'top_potsdam_{}_RGBIR.json'.format(id)) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, size=1000, min_features=5) raster_uri = crop_uri label_source = rv.LabelSourceConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_uri(label_uri) \ .with_ioa_thresh(0.5) \ .with_use_intersection_over_cell(False) \ .with_pick_min_class_id(True) \ .with_background_class_id(2) \ .with_infer_cells(True) \ .build() return rv.SceneConfig.builder() \ .with_id(id) \ .with_task(task) \ .with_raster_source(raster_uri, channel_order=[0, 1, 2]) \ .with_label_source(label_source) \ .build() train_scenes = [make_scene(id) for id in train_scene_ids] val_scenes = [make_scene(id) for id in val_scene_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_root_uri(root_uri) \ .with_task(task) \ .with_backend(backend) \ .with_dataset(dataset) \ .build() return experiment