def test_aspp_decoder_creation(self, level, dilation_rates, num_filters): """Test creation of ASPP decoder.""" input_specs = {'1': tf.TensorShape([1, 128, 128, 3])} network = decoders.ASPP(level=level, dilation_rates=dilation_rates, num_filters=num_filters, use_sync_bn=True) model_config = configs.semantic_segmentation.SemanticSegmentationModel( ) model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='aspp', aspp=decoders_cfg.ASPP(level=level, dilation_rates=dilation_rates, num_filters=num_filters)) factory_network = factory.build_decoder(input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() # Due to calling `super().get_config()` in aspp layer, everything but the # the name of two layer instances are the same, so we force equal name so it # will not give false alarm. factory_network_config['name'] = network_config['name'] self.assertEqual(network_config, factory_network_config)
def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay): num_classes = 21 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = semantic_segmentation_cfg.SemanticSegmentationModel( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type, mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=16)), decoder=decoders.Decoder(type='aspp', aspp=decoders.ASPP(level=4, num_filters=256, dilation_rates=[], spp_layer_version='v1', output_tensor=True)), head=semantic_segmentation_cfg.SegmentationHead( level=4, low_level=2, num_convs=1, upsample_factor=2, use_depthwise_convolution=True)) l2_regularizer = (tf.keras.regularizers.l2(weight_decay) if weight_decay else None) model = factory.build_segmentation_model(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) quantization_config = common.Quantization() _ = qat_factory.build_qat_segmentation_model( model=model, quantization=quantization_config, input_specs=input_specs)
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters): """Test creation of ASPP decoder.""" input_specs = {'1': tf.TensorShape([1, 128, 128, 3])} network = decoders.ASPP( level=level, dilation_rates=dilation_rates, num_filters=num_filters, use_sync_bn=True) model_config = configs.semantic_segmentation.SemanticSegmentationModel() model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='aspp', aspp=decoders_cfg.ASPP( level=level, dilation_rates=dilation_rates, num_filters=num_filters)) factory_network = factory.build_decoder( input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet deeplabv3+.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=[512, 1024])), head=SegmentationHead(level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet deeplabv3.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [12, 24, 36] # [6, 12, 18] if output_stride = 16 multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, multigrid=multigrid, stem_type=stem_type)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on cityscapes with mobilenetv2 deeplabv3.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [] pool_kernel_size = [512, 1024] level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone(type='mobilenet', mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=output_stride)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=pool_kernel_size)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # Coco pre-trained mobilenetv2 checkpoint init_checkpoint= 'gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=100000, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, best_checkpoint_eval_metric='mean_iou', best_checkpoint_export_subdir='best_ckpt', best_checkpoint_metric_comp='higher', optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 100000, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_ade20k_32(backbone: str, init_backbone: bool = True ) -> cfg.ExperimentConfig: """Semantic segmentation on ADE20K dataset with deeplabv3+.""" epochs = 200 train_batch_size = 128 eval_batch_size = 32 image_size = 512 steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size aspp_dilation_rates = [5, 10, 15] pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[ backbone] if init_backbone else None config = cfg.ExperimentConfig( task=CustomSemanticSegmentationTaskConfig( model=base_cfg.SemanticSegmentationModel( # ADE20K uses only 32 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=32, input_size=[None, None, 3], backbone=Backbone( type='mobilenet_edgetpu', mobilenet_edgetpu=MobileNetEdgeTPU( model_id=backbone, pretrained_checkpoint_path=pretrained_checkpoint_path, freeze_large_filters=500, )), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=BACKBONE_HEADPOINT[backbone], use_depthwise_convolution=True, dilation_rates=aspp_dilation_rates, pool_kernel_size=[256, 256], num_filters=128, dropout_rate=0.3, )), head=base_cfg.SegmentationHead( level=BACKBONE_HEADPOINT[backbone], num_convs=2, num_filters=256, use_depthwise_convolution=True, feature_fusion='deeplabv3plus', low_level=BACKBONE_LOWER_FEATURES[backbone], low_level_num_filters=48), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=2e-3, use_sync_bn=False)), train_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'), output_size=[image_size, image_size], is_training=True, global_batch_size=train_batch_size), validation_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'), output_size=[image_size, image_size], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), evaluation=base_cfg.Evaluation(report_train_mean_iou=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=epochs * steps_per_epoch, validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0001, 'decay_steps': epochs * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 4 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_scooter() -> cfg.ExperimentConfig: """Image segmentation on scooter dataset with resnet deeplabv3+. Barebones config for testing purpose (modify batch size, initial lr, steps per epoch, train input path, val input path) """ scooter_path_glob = 'D:/data/test_data/val**' steps_per_epoch = 1 output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=19, input_size=[512, 512, 3], # specifying this speeds up model inference, no change in size backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead( level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation( activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses( l2_weight_decay=1e-4, ignore_label=250), train_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=True, global_batch_size=1, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=False, global_batch_size=1, resize_eval_groundtruth=True, drop_remainder=False)), # resnet101 # init_checkpoint='D:/repos/data_root/test_data/deeplab_cityscapes_pretrained/model.ckpt', # init_checkpoint_modules='all'), # init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', # init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=1021, validation_interval=steps_per_epoch, continuous_eval_timeout=1, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv2_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with vggnet & resnet deeplabv2.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size # for Large FOV fov_dilation_rates = 12 kernel_size = 3 # for ASPP aspp_dilation_rates = [6, 12, 18, 24] output_stride = 16 level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_vggnet', dilated_vggnet=backbones.DilatedVGGNet(model_id=16)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, stem_type='v2', num_filters=1024, use_sync_bn=True)), head=SegmentationHead(level=level, num_convs=0, low_level_num_filters=1024, feature_fusion='deeplabv2'), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint='/home/gunho1123/ckpt_vggnet16_deeplab/', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config