def test_model_initializing(self, init_checkpoint_modules): shared_backbone = ('segmentation_backbone' not in init_checkpoint_modules) shared_decoder = ('segmentation_decoder' not in init_checkpoint_modules and shared_backbone) task_config = cfg.PanopticMaskRCNNTask( model=cfg.PanopticMaskRCNN( num_classes=2, input_size=[640, 640, 3], segmentation_model=segmentation_cfg.SemanticSegmentationModel( decoder=decoder_cfg.Decoder(type='fpn')), shared_backbone=shared_backbone, shared_decoder=shared_decoder)) task = panoptic_maskrcnn.PanopticMaskRCNNTask(task_config) model = task.build_model() ckpt = tf.train.Checkpoint(**model.checkpoint_items) ckpt_save_dir = self.create_tempdir().full_path ckpt.save(os.path.join(ckpt_save_dir, 'ckpt')) if (init_checkpoint_modules == ['all'] or 'backbone' in init_checkpoint_modules): task._task_config.init_checkpoint = ckpt_save_dir if ('segmentation_backbone' in init_checkpoint_modules or 'segmentation_decoder' in init_checkpoint_modules): task._task_config.segmentation_init_checkpoint = ckpt_save_dir task._task_config.init_checkpoint_modules = init_checkpoint_modules task.initialize(model)
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters): """Test creation of ASPP decoder.""" input_specs = {'1': tf.TensorShape([1, 128, 128, 3])} network = decoders.ASPP(level=level, dilation_rates=dilation_rates, num_filters=num_filters, use_sync_bn=True) model_config = configs.semantic_segmentation.SemanticSegmentationModel( ) model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='aspp', aspp=decoders_cfg.ASPP(level=level, dilation_rates=dilation_rates, num_filters=num_filters)) factory_network = factory.build_decoder(input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() # Due to calling `super().get_config()` in aspp layer, everything but the # the name of two layer instances are the same, so we force equal name so it # will not give false alarm. factory_network_config['name'] = network_config['name'] self.assertEqual(network_config, factory_network_config)
def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay): num_classes = 21 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = semantic_segmentation_cfg.SemanticSegmentationModel( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type, mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=16)), decoder=decoders.Decoder(type='aspp', aspp=decoders.ASPP(level=4, num_filters=256, dilation_rates=[], spp_layer_version='v1', output_tensor=True)), head=semantic_segmentation_cfg.SegmentationHead( level=4, low_level=2, num_convs=1, upsample_factor=2, use_depthwise_convolution=True)) l2_regularizer = (tf.keras.regularizers.l2(weight_decay) if weight_decay else None) model = factory.build_segmentation_model(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) quantization_config = common.Quantization() _ = qat_factory.build_qat_segmentation_model( model=model, quantization=quantization_config, input_specs=input_specs)
def test_nasfpn_decoder_creation(self, num_filters, num_repeats, use_separable_conv): """Test creation of NASFPN decoder.""" min_level = 3 max_level = 7 input_specs = {} for level in range(min_level, max_level): input_specs[str(level)] = tf.TensorShape( [1, 128 // (2**level), 128 // (2**level), 3]) network = decoders.NASFPN(input_specs=input_specs, num_filters=num_filters, num_repeats=num_repeats, use_separable_conv=use_separable_conv, use_sync_bn=True) model_config = configs.retinanet.RetinaNet() model_config.min_level = min_level model_config.max_level = max_level model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='nasfpn', nasfpn=decoders_cfg.NASFPN(num_filters=num_filters, num_repeats=num_repeats, use_separable_conv=use_separable_conv)) factory_network = factory.build_decoder(input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters): """Test creation of ASPP decoder.""" input_specs = {'1': tf.TensorShape([1, 128, 128, 3])} network = decoders.ASPP( level=level, dilation_rates=dilation_rates, num_filters=num_filters, use_sync_bn=True) model_config = configs.semantic_segmentation.SemanticSegmentationModel() model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='aspp', aspp=decoders_cfg.ASPP( level=level, dilation_rates=dilation_rates, num_filters=num_filters)) factory_network = factory.build_decoder( input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
class Submodel(hyperparams.Config): name: str = 'foo' num_classes: int = 0 min_level: int = 3 # only for FPN or NASFPN max_level: int = 6 # only for FPN or NASFPN head: hyperparams.Config = SegmentationHead() decoder: decoders.Decoder = decoders.Decoder(type='identity') init_checkpoint_modules: str = None
class YoloModel(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 # only for FPN or NASFPN max_level: int = 6 # only for FPN or NASFPN head: hyperparams.Config = YoloHead() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='identity') norm_activation: common.NormActivation = common.NormActivation()
class SemanticSegmentationModel(hyperparams.Config): """Semantic segmentation model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 head: SegmentationHead = SegmentationHead() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='identity') norm_activation: common.NormActivation = common.NormActivation()
class BASNetModel(hyperparams.Config): """BASNet model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) #min_level: int = 3 #max_level: int = 6 #head: BASNetHead = BASNetHead() backbone: backbones.Backbone = backbones.Backbone( type='basnet_en', basnet_en=backbones.BASNet_En()) decoder: decoders.Decoder = decoders.Decoder(type='basnet_de') norm_activation: common.NormActivation = common.NormActivation()
def test_identity_decoder_creation(self): """Test creation of identity decoder.""" model_config = configs.retinanet.RetinaNet() model_config.num_classes = 2 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='identity', identity=decoders_cfg.Identity()) factory_network = factory.build_decoder(input_specs=None, model_config=model_config) self.assertIsNone(factory_network)
class RetinaNet(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 7 anchor: Anchor = Anchor() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) head: RetinaNetHead = RetinaNetHead() detection_generator: DetectionGenerator = DetectionGenerator() norm_activation: common.NormActivation = common.NormActivation()
class MaskRCNN(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 2 max_level: int = 6 anchor: Anchor = Anchor() include_mask: bool = True backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) rpn_head: RPNHead = RPNHead() detection_head: DetectionHead = DetectionHead() roi_generator: ROIGenerator = ROIGenerator() roi_sampler: ROISampler = ROISampler() roi_aligner: ROIAligner = ROIAligner() detection_generator: DetectionGenerator = DetectionGenerator() mask_head: Optional[MaskHead] = MaskHead() mask_sampler: Optional[MaskSampler] = MaskSampler() mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner() norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.997, norm_epsilon=0.0001, use_sync_bn=True)
def test_builder(self, backbone_type, input_size, segmentation_backbone_type, segmentation_decoder_type, fusion_type): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) segmentation_output_stride = 16 level = int(np.math.log2(segmentation_output_stride)) segmentation_model = semantic_segmentation.SemanticSegmentationModel( num_classes=2, backbone=backbones.Backbone(type=segmentation_backbone_type), decoder=decoders.Decoder(type=segmentation_decoder_type), head=semantic_segmentation.SegmentationHead(level=level)) model_config = panoptic_maskrcnn_cfg.PanopticMaskRCNN( num_classes=num_classes, segmentation_model=segmentation_model, backbone=backbones.Backbone(type=backbone_type), shared_backbone=segmentation_backbone_type is None, shared_decoder=segmentation_decoder_type is None) l2_regularizer = tf.keras.regularizers.l2(5e-5) _ = factory.build_panoptic_maskrcnn(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer)
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet using Mobile SpineNet backbone.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size input_size = 384 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( backbone=backbones.Backbone( type='spinenet_mobile', spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7)), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), head=RetinaNetHead(num_filters=48, use_separable_conv=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True, activation='swish'), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=3e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=600 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [575 * steps_per_epoch, 590 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet_mobile.min_level', 'task.model.max_level == task.model.backbone.spinenet_mobile.max_level', ]) return config
def seg_deeplabv2_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with vggnet & resnet deeplabv2.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size # for Large FOV fov_dilation_rates = 12 kernel_size = 3 # for ASPP aspp_dilation_rates = [6, 12, 18, 24] output_stride = 16 level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_vggnet', dilated_vggnet=backbones.DilatedVGGNet(model_id=16)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, stem_type='v2', num_filters=1024, use_sync_bn=True)), head=SegmentationHead(level=level, num_convs=0, low_level_num_filters=1024, feature_fusion='deeplabv2'), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint='/home/gunho1123/ckpt_vggnet16_deeplab/', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_ade20k_32(backbone: str, init_backbone: bool = True ) -> cfg.ExperimentConfig: """Semantic segmentation on ADE20K dataset with deeplabv3+.""" epochs = 200 train_batch_size = 128 eval_batch_size = 32 image_size = 512 steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size aspp_dilation_rates = [5, 10, 15] pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[ backbone] if init_backbone else None config = cfg.ExperimentConfig( task=CustomSemanticSegmentationTaskConfig( model=base_cfg.SemanticSegmentationModel( # ADE20K uses only 32 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=32, input_size=[None, None, 3], backbone=Backbone( type='mobilenet_edgetpu', mobilenet_edgetpu=MobileNetEdgeTPU( model_id=backbone, pretrained_checkpoint_path=pretrained_checkpoint_path, freeze_large_filters=500, )), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=BACKBONE_HEADPOINT[backbone], use_depthwise_convolution=True, dilation_rates=aspp_dilation_rates, pool_kernel_size=[256, 256], num_filters=128, dropout_rate=0.3, )), head=base_cfg.SegmentationHead( level=BACKBONE_HEADPOINT[backbone], num_convs=2, num_filters=256, use_depthwise_convolution=True, feature_fusion='deeplabv3plus', low_level=BACKBONE_LOWER_FEATURES[backbone], low_level_num_filters=48), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=2e-3, use_sync_bn=False)), train_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'), output_size=[image_size, image_size], is_training=True, global_batch_size=train_batch_size), validation_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'), output_size=[image_size, image_size], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), evaluation=base_cfg.Evaluation(report_train_mean_iou=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=epochs * steps_per_epoch, validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0001, 'decay_steps': epochs * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 4 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_resnetfpn_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet-fpn.""" train_batch_size = 256 eval_batch_size = 32 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[512, 512, 3], min_level=3, max_level=7, backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), decoder=decoders.Decoder(type='fpn', fpn=decoders.FPN()), head=SegmentationHead(level=3, num_convs=3), norm_activation=common.NormActivation(activation='swish', use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'train_aug*'), is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.2, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=450 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 450 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet deeplabv3+.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=[512, 1024])), head=SegmentationHead(level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_scooter() -> cfg.ExperimentConfig: """Image segmentation on scooter dataset with resnet deeplabv3+. Barebones config for testing purpose (modify batch size, initial lr, steps per epoch, train input path, val input path) """ scooter_path_glob = 'D:/data/test_data/val**' steps_per_epoch = 1 output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=19, input_size=[512, 512, 3], # specifying this speeds up model inference, no change in size backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead( level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation( activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses( l2_weight_decay=1e-4, ignore_label=250), train_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=True, global_batch_size=1, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=False, global_batch_size=1, resize_eval_groundtruth=True, drop_remainder=False)), # resnet101 # init_checkpoint='D:/repos/data_root/test_data/deeplab_cityscapes_pretrained/model.ckpt', # init_checkpoint_modules='all'), # init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', # init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=1021, validation_interval=steps_per_epoch, continuous_eval_timeout=1, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def multitask_vision() -> multi_cfg.MultiTaskExperimentConfig: """ Vision task with single backbone and multiple heads. Each head can be a segmenter, detector or classifier. TODO: use same num_class and input_size in both task and model definition multi_cfg.MultiTaskConfig: - Retains each task_name, entire task, eval_steps and weights, - Entire_task used in respective multitask trainers for train_step - Weights used in task_sampler multi_cfg.MultiTaskTrainerConfig: - trainer_type and task_sampler used to configure task sampling in train_lib - Normal multi_cfg.TrainerConfig params used directly in train_lib """ input_path_segmentation = '' input_path_classification = '' input_path_yolo = '' steps_per_epoch = 6915 + 2486 + 600 train_batch_size = 1 eval_batch_size = 1 validation_steps = 1021 + 621 + 600 segmentation_routine = multi_cfg.TaskRoutine( task_name='segmentation', task_config=SemanticSegmentationSubtask( model=SemanticSegmentationModelSpecs(num_classes=19, input_size=[256, 256, 3]), losses=SegmentationLosses(ignore_label=250, top_k_percent_pixels=0.3), train_data=SegmentationDataConfig( output_size=[256, 256], input_path=input_path_segmentation, global_batch_size=train_batch_size, is_training=True, aug_scale_min=0.5, aug_scale_max=2.0, preserve_aspect_ratio=False, aug_policy='randaug', randaug_magnitude=5, randaug_available_ops=[ 'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'Cutout', 'SolarizeAdd' ]), validation_data=SegmentationDataConfig( output_size=[256, 256], input_path=input_path_segmentation, global_batch_size=eval_batch_size, is_training=False, resize_eval_groundtruth=True, drop_remainder=False)), eval_steps=603, # check where eval steps is used task_weight=1.0) classification_routine = multi_cfg.TaskRoutine( task_name='classification', task_config=ImageClassificationSubtask( model=ImageClassificationModelSpecs(num_classes=4, input_size=[256, 256, 3]), losses=ClassificationLosses(label_smoothing=0.1), train_data=ClassificationDataConfig( input_path=input_path_classification, is_training=True, global_batch_size=train_batch_size, aug_policy='randaug', randaug_magnitude=5), validation_data=ClassificationDataConfig( input_path=input_path_classification, is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), eval_steps=621, # check where eval steps is used task_weight=1.0) yolo_routine = multi_cfg.TaskRoutine( task_name='yolo', task_config=YoloSubtask( model=YoloModelSpecs(num_classes=4, input_size=[256, 256, 3], head=YoloHead(anchor_per_scale=3, strides=[16, 32, 64], anchors=[ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ], xy_scale=[1.2, 1.1, 1.05])), losses=YoloLosses(l2_weight_decay=1e-4, iou_loss_thres=0.5), train_data=YoloDataConfig(input_path=input_path_yolo, is_training=True, global_batch_size=train_batch_size, aug_policy='randaug', randaug_magnitude=5), validation_data=YoloDataConfig(input_path=input_path_yolo, is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), eval_steps=600, # check where eval steps is used task_weight=1.0) model_config = MultiHeadModel( input_size=[256, 256, 3], backbone=backbones.Backbone(type='hardnet', hardnet=backbones.HardNet(model_id=70)), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.9997, norm_epsilon=0.001, use_sync_bn=True), heads=[ Submodel( name='classification', num_classes=4, head=ImageClassificationHead( level=0, # decoder is identity function num_convs=2, num_filters=256, add_head_batch_norm=False, dropout_rate=0.2)), Submodel(name='segmentation', num_classes=19, decoder=decoders.Decoder( type='hardnet', hardnet=decoders.HardNet(model_id=70)), head=SegmentationHead(level=0, num_convs=0, feature_fusion=None, low_level=0, low_level_num_filters=0)), Submodel(name='yolo', num_classes=4, decoder=decoders.Decoder(type='pan', pan=decoders.PAN(levels=3)), head=YoloHead(anchor_per_scale=3, strides=[16, 32, 64], anchors=[ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ], xy_scale=[1.2, 1.1, 1.05])) ], l2_weight_decay=1e-4) return multi_cfg.MultiTaskExperimentConfig( task=multi_cfg.MultiTaskConfig(model=model_config, init_checkpoint=None, task_routines=(segmentation_routine, classification_routine, yolo_routine)), trainer=multi_cfg.MultiTaskTrainerConfig( trainer_type="interleaving", task_sampler=multi_cfg.TaskSamplingConfig( type="proportional", proportional=multi_cfg.ProportionalSampleConfig( alpha=1.0)), # uniform, proportional or annealing steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=validation_steps, validation_interval=steps_per_epoch, best_checkpoint_eval_metric='mean_iou', continuous_eval_timeout=3600, max_to_keep=5, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })))
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49')), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=256, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=8)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // 8, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [steps_per_epoch * 320, steps_per_epoch * 340], 'values': [0.28, 0.028, 0.0028], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def basnet_duts() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet deeplabv3.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = DUTS_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=BASNetTask( model=BASNetModel( #num_classes=21, # TODO(arashwan): test changing size to 513 to match deeplab. input_size=[224, 224, 3], # Resize to 256, 256 backbone=backbones.Backbone( type='basnet_en', basnet_en=backbones.BASNet_En( )), decoder=decoders.Decoder( type='basnet_de', basnet_de=decoders.BASNet_De( )), #head=BASNetHead(level=3, num_convs=0), norm_activation=common.NormActivation( activation='relu', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=0), train_data=DataConfig( #input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # Dataset Path # input_path=os.path.join(DUTS_INPUT_PATH_BASE_TR, 'DUTS-TR-*'), is_training=True, global_batch_size=train_batch_size, #aug_scale_min=0.5, #aug_scale_max=2.0 ), validation_data=DataConfig( input_path=os.path.join(DUTS_INPUT_PATH_BASE_VAL, 'DUTS-TE-*'), is_training=False, global_batch_size=eval_batch_size, ), #init_checkpoint='', #init_checkpoint_modules='backbone' ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, # (gunho) more epochs validation_steps=DUTS_VAL_EXAMPLES // eval_batch_size, # No validation in BASNet validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', #BASNet 'adam': { 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-8, } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': {'boundaries': [70*steps_per_epoch, 100*steps_per_epoch, 150*steps_per_epoch], 'values': [0.01, 0.001, 0.001, 0.0001]} } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on cityscapes with mobilenetv2 deeplabv3.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [] pool_kernel_size = [512, 1024] level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone(type='mobilenet', mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=output_stride)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=pool_kernel_size)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # Coco pre-trained mobilenetv2 checkpoint init_checkpoint= 'gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=100000, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, best_checkpoint_eval_metric='mean_iou', best_checkpoint_export_subdir='best_ckpt', best_checkpoint_metric_comp='higher', optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 100000, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Cascade RCNN-RS with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), detection_head=DetectionHead( class_agnostic_bbox_pred=True, cascade_class_ensemble=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( use_sync_bn=True, activation='swish'), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 475, steps_per_epoch * 490 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with resnet deeplabv3.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [12, 24, 36] # [6, 12, 18] if output_stride = 16 multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, multigrid=multigrid, stem_type=stem_type)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def detector_yolo() -> cfg.ExperimentConfig: """YOLO on custom datasets""" config = cfg.ExperimentConfig( task=YoloTask( model=YoloModel( num_classes=6, input_size=[256, 256, 3], backbone=backbones.Backbone( type='hardnet', hardnet=backbones.HardNet(model_id=70)), decoder=decoders.Decoder( type='pan', pan=decoders.PAN(levels=3)), head=YoloHead( anchor_per_scale=3, strides=[16, 32, 64], anchors=[12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401], xy_scale=[1.2, 1.1, 1.05] ), norm_activation=common.NormActivation( activation='relu', norm_momentum=0.9997, norm_epsilon=0.001, use_sync_bn=True)), losses=YoloLosses(l2_weight_decay=1e-4, iou_loss_thres=0.5), train_data=DataConfig( input_path='D:/data/whizz_tf/detect_env*', output_size=[256, 256], is_training=True, global_batch_size=1), validation_data=DataConfig( input_path='D:/data/whizz_tf/detect_env*', output_size=[256, 256], is_training=False, global_batch_size=1, drop_remainder=False), # init_checkpoint=None init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=2, summary_interval=2, checkpoint_interval=2, train_steps=20, validation_steps=20, validation_interval=2, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 20, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def retinanet_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet using SpineNet backbone.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size input_size = 640 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask(model=RetinaNet( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49')), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=350 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [320 * steps_per_epoch, 340 * steps_per_epoch], 'values': [ 0.28 * train_batch_size / 256.0, 0.028 * train_batch_size / 256.0, 0.0028 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config