def test_builder(self, backbone_type, input_size, has_att_heads): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) if has_att_heads: attribute_heads_config = [ retinanet_cfg.AttributeHead(name='att1'), retinanet_cfg.AttributeHead(name='att2', type='classification', size=2), ] else: attribute_heads_config = None model_config = retinanet_cfg.RetinaNet( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type), head=retinanet_cfg.RetinaNetHead( attribute_heads=attribute_heads_config)) l2_regularizer = tf.keras.regularizers.l2(5e-5) _ = factory.build_retinanet(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) if has_att_heads: self.assertEqual(model_config.head.attribute_heads[0].as_dict(), dict(name='att1', type='regression', size=1)) self.assertEqual(model_config.head.attribute_heads[1].as_dict(), dict(name='att2', type='classification', size=2))
def test_builder(self, input_size, backbone_type, level, low_level, decoder_type, shared_decoder, generate_panoptic_masks): num_classes = 10 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = panoptic_deeplab_cfg.PanopticDeeplab( num_classes=num_classes, input_size=input_size, backbone=backbones.Backbone(type=backbone_type), decoder=decoders.Decoder(type=decoder_type), semantic_head=panoptic_deeplab_cfg.SemanticHead( level=level, num_convs=1, kernel_size=5, prediction_kernel_size=1, low_level=low_level), instance_head=panoptic_deeplab_cfg.InstanceHead( level=level, num_convs=1, kernel_size=5, prediction_kernel_size=1, low_level=low_level), shared_decoder=shared_decoder, generate_panoptic_masks=generate_panoptic_masks) l2_regularizer = tf.keras.regularizers.l2(5e-5) _ = factory.build_panoptic_deeplab(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer)
def test_spinenet_creation(self, model_id): """Test creation of SpineNet models.""" input_size = 128 min_level = 3 max_level = 7 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size, input_size, 3]) network = backbones.SpineNet(input_specs=input_specs, min_level=min_level, max_level=max_level, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='spinenet', spinenet=backbones_cfg.SpineNet(model_id=model_id)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec( shape=[None, input_size, input_size, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_validation_step(self): config = detr_cfg.DetrTask( model=detr_cfg.Detr( input_size=[1333, 1333, 3], num_encoder_layers=1, num_decoder_layers=1, backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=10, bn_trainable=False)) ), losses=detr_cfg.Losses(class_offset=1), validation_data=detr_cfg.DataConfig( tfds_name='coco/2017', tfds_split='validation', is_training=False, global_batch_size=2, )) with tfds.testing.mock_data(as_dataset_fn=_as_dataset): task = detection.DetectionTask(config) model = task.build_model() metrics = task.build_metrics(training=False) dataset = task.build_inputs(config.validation_data) iterator = iter(dataset) logs = task.validation_step(next(iterator), model, metrics) state = task.aggregate_logs(step_outputs=logs) task.reduce_aggregated_logs(state)
def test_efficientnet_creation(self, model_id, se_ratio): """Test creation of EfficientNet models.""" network = backbones.EfficientNet(model_id=model_id, se_ratio=se_ratio, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='efficientnet', efficientnet=backbones_cfg.EfficientNet(model_id=model_id, se_ratio=se_ratio)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay): num_classes = 21 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = semantic_segmentation_cfg.SemanticSegmentationModel( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type, mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=16)), decoder=decoders.Decoder(type='aspp', aspp=decoders.ASPP(level=4, num_filters=256, dilation_rates=[], spp_layer_version='v1', output_tensor=True)), head=semantic_segmentation_cfg.SegmentationHead( level=4, low_level=2, num_convs=1, upsample_factor=2, use_depthwise_convolution=True)) l2_regularizer = (tf.keras.regularizers.l2(weight_decay) if weight_decay else None) model = factory.build_segmentation_model(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) quantization_config = common.Quantization() _ = qat_factory.build_qat_segmentation_model( model=model, quantization=quantization_config, input_specs=input_specs)
def test_mobilenet_creation(self, model_id, filter_size_scale): """Test creation of Mobilenet models.""" network = backbones.MobileNet(model_id=model_id, filter_size_scale=filter_size_scale, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='mobilenet', mobilenet=backbones_cfg.MobileNet( model_id=model_id, filter_size_scale=filter_size_scale)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_builder(self, backbone_type, input_size): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = maskrcnn_cfg.MaskRCNN( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type)) l2_regularizer = tf.keras.regularizers.l2(5e-5) _ = factory.build_maskrcnn(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer)
def test_builder(self, backbone_type, input_size, weight_decay): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) model_config = classification_cfg.ImageClassificationModel( num_classes=num_classes, backbone=backbones.Backbone(type=backbone_type)) l2_regularizer = (tf.keras.regularizers.l2(weight_decay) if weight_decay else None) _ = factory.build_classification_model(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer)
class Detr(hyperparams.Config): num_queries: int = 100 hidden_size: int = 256 num_classes: int = 91 # 0: background num_encoder_layers: int = 6 num_decoder_layers: int = 6 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone(type='resnet', resnet=backbones.ResNet( model_id=50, bn_trainable=False)) norm_activation: common.NormActivation = common.NormActivation()
class SimCLRModel(hyperparams.Config): """SimCLR model config.""" input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) projection_head: ProjectionHead = ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1) supervised_head: SupervisedHead = SupervisedHead(num_classes=1001) norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False) mode: str = simclr_model.PRETRAIN backbone_trainable: bool = True
class SemanticSegmentationModel(hyperparams.Config): """Semantic segmentation model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 head: SegmentationHead = SegmentationHead() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='identity') mask_scoring_head: Optional[MaskScoringHead] = None norm_activation: common.NormActivation = common.NormActivation()
class ImageClassificationModel(hyperparams.Config): """The model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) dropout_rate: float = 0.0 norm_activation: common.NormActivation = common.NormActivation( use_sync_bn=False) # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification add_head_batch_norm: bool = False kernel_initializer: str = 'random_uniform'
class RetinaNet(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 7 anchor: Anchor = Anchor() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) head: RetinaNetHead = RetinaNetHead() detection_generator: DetectionGenerator = DetectionGenerator() norm_activation: common.NormActivation = common.NormActivation()
def test_builder(self, backbone_type, input_size, segmentation_backbone_type, segmentation_decoder_type): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) segmentation_output_stride = 16 level = int(np.math.log2(segmentation_output_stride)) segmentation_model = semantic_segmentation.SemanticSegmentationModel( num_classes=2, backbone=backbones.Backbone(type=segmentation_backbone_type), decoder=decoders.Decoder(type=segmentation_decoder_type), head=semantic_segmentation.SegmentationHead(level=level)) model_config = panoptic_maskrcnn_cfg.PanopticMaskRCNN( num_classes=num_classes, segmentation_model=segmentation_model, backbone=backbones.Backbone(type=backbone_type), shared_backbone=segmentation_backbone_type is None, shared_decoder=segmentation_decoder_type is None) l2_regularizer = tf.keras.regularizers.l2(5e-5) _ = factory.build_panoptic_maskrcnn(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer)
class PanopticDeeplab(hyperparams.Config): """Panoptic Deeplab model config.""" num_classes: int = 2 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 norm_activation: common.NormActivation = common.NormActivation() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='aspp') semantic_head: SemanticHead = SemanticHead() instance_head: InstanceHead = InstanceHead() shared_decoder: bool = False generate_panoptic_masks: bool = True post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor( )
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: """Generates a config for COCO OD RetinaNet for mobile with QAT.""" config = retinanet.retinanet_spinenet_mobile_coco() task = RetinaNetTask.from_args(quantization=common.Quantization(), **config.task.as_dict()) task.model.backbone = backbones.Backbone( type='spinenet_mobile', spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=True)) config.task = task return config
class SimCLRMTModelConfig(hyperparams.Config): """Model config for multi-task SimCLR model.""" input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) backbone_trainable: bool = True projection_head: simclr_configs.ProjectionHead = simclr_configs.ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1) norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False) heads: Tuple[SimCLRMTHeadConfig, ...] = () # L2 weight decay is used in the model, not in task. # Note that this can not be used together with lars optimizer. l2_weight_decay: float = 0.0 init_checkpoint: str = '' # backbone_projection or backbone init_checkpoint_modules: str = 'backbone_projection'
class AutosegEdgeTPUModelConfig(hyperparams.Config): """Autoseg-EdgeTPU segmentation model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone() head: BiFPNHeadConfig = BiFPNHeadConfig() model_params: Mapping[str, Any] = dataclasses.field( default_factory=lambda: { # pylint: disable=g-long-lambda 'model_name': 'autoseg_edgetpu_backbone_s', 'checkpoint_format': 'tf_checkpoint', 'overrides': { 'batch_norm': 'tpu', 'rescale_input': False, 'backbone_only': True, 'resolution': 512 } })
def test_train_step(self): config = detr_cfg.DetrTask( model=detr_cfg.Detr( input_size=[1333, 1333, 3], num_encoder_layers=1, num_decoder_layers=1, num_classes=81, backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=10, bn_trainable=False)) ), train_data=coco.COCODataConfig( tfds_name='coco/2017', tfds_split='validation', is_training=True, global_batch_size=2, )) with tfds.testing.mock_data(as_dataset_fn=_as_dataset): task = detection.DetectionTask(config) model = task.build_model() dataset = task.build_inputs(config.train_data) iterator = iter(dataset) opt_cfg = optimization.OptimizationConfig({ 'optimizer': { 'type': 'detr_adamw', 'detr_adamw': { 'weight_decay_rate': 1e-4, 'global_clipnorm': 0.1, } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [120000], 'values': [0.0001, 1.0e-05] } }, }) optimizer = detection.DetectionTask.create_optimizer(opt_cfg) task.train_step(next(iterator), model, optimizer)
def test_builder(self, backbone_type, input_size, has_attribute_heads): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) if has_attribute_heads: attribute_heads_config = [ retinanet_cfg.AttributeHead(name='att1'), retinanet_cfg.AttributeHead(name='att2', type='classification', size=2), ] else: attribute_heads_config = None model_config = retinanet_cfg.RetinaNet( num_classes=num_classes, backbone=backbones.Backbone( type=backbone_type, spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=True)), head=retinanet_cfg.RetinaNetHead( attribute_heads=attribute_heads_config)) l2_regularizer = tf.keras.regularizers.l2(5e-5) quantization_config = common.Quantization() model = factory.build_retinanet(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) _ = qat_factory.build_qat_retinanet(model=model, quantization=quantization_config, model_config=model_config) if has_attribute_heads: self.assertEqual(model_config.head.attribute_heads[0].as_dict(), dict(name='att1', type='regression', size=1)) self.assertEqual(model_config.head.attribute_heads[1].as_dict(), dict(name='att2', type='classification', size=2))
class MaskRCNN(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 2 max_level: int = 6 anchor: Anchor = Anchor() include_mask: bool = True backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) rpn_head: RPNHead = RPNHead() detection_head: DetectionHead = DetectionHead() roi_generator: ROIGenerator = ROIGenerator() roi_sampler: ROISampler = ROISampler() roi_aligner: ROIAligner = ROIAligner() detection_generator: DetectionGenerator = DetectionGenerator() mask_head: Optional[MaskHead] = MaskHead() mask_sampler: Optional[MaskSampler] = MaskSampler() mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner() norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.997, norm_epsilon=0.0001, use_sync_bn=True)
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: """Image classification on imagenet with mobilenet.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, dropout_rate=0.2, input_size=[224, 224, 3], backbone=backbones.Backbone(type='mobilenet', mobilenet=backbones.MobileNet( model_id='MobileNetV2', filter_size_scale=1.0)), norm_activation=common.NormActivation(norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1), train_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'rmsprop', 'rmsprop': { 'rho': 0.9, 'momentum': 0.9, 'epsilon': 0.002, } }, 'learning_rate': { 'type': 'exponential', 'exponential': { 'initial_learning_rate': 0.008 * (train_batch_size // 128), 'decay_steps': int(2.5 * steps_per_epoch), 'decay_rate': 0.98, 'staircase': True } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } }, })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def simclr_finetuning_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 1024 eval_batch_size = 1024 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size pretrain_model_base = '' return cfg.ExperimentConfig( task=SimCLRFinetuneTask( model=SimCLRModel( mode=simclr_model.FINETUNE, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001, zero_init=True), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), loss=ClassificationLosses(), evaluation=Evaluation(), train_data=DataConfig( parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), init_checkpoint=pretrain_model_base, # all, backbone_projection or backbone init_checkpoint_modules='backbone_projection'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=60 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.0, 'exclude_from_weight_decay': [ 'batch_normalization', 'bias' ] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.01 × BatchSize / 512 'initial_learning_rate': 0.01 * train_batch_size / 512, 'decay_steps': 60 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def simclr_pretraining_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size return cfg.ExperimentConfig( task=SimCLRPretrainTask( model=SimCLRModel( mode=simclr_model.PRETRAIN, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=True)), loss=ContrastiveLoss(), evaluation=Evaluation(), train_data=DataConfig( parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.000001, 'exclude_from_weight_decay': [ 'batch_normalization', 'bias' ] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.2 * BatchSize / 256 'initial_learning_rate': 0.2 * train_batch_size / 256, # train_steps - warmup_steps 'decay_steps': 475 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { # 5% of total epochs 'warmup_steps': 25 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def test_builder(self, backbone_type, decoder_type, input_size, quantize_detection_head, quantize_detection_decoder): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) if backbone_type == 'spinenet_mobile': backbone_config = backbones.Backbone( type=backbone_type, spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=True)) elif backbone_type == 'mobilenet': backbone_config = backbones.Backbone(type=backbone_type, mobilenet=backbones.MobileNet( model_id='MobileNetV2', filter_size_scale=1.0)) else: raise ValueError( 'backbone_type {} is not supported'.format(backbone_type)) if decoder_type == 'identity': decoder_config = decoders.Decoder(type=decoder_type) elif decoder_type == 'fpn': decoder_config = decoders.Decoder(type=decoder_type, fpn=decoders.FPN( num_filters=128, use_separable_conv=True, use_keras_layer=True)) else: raise ValueError( 'decoder_type {} is not supported'.format(decoder_type)) model_config = retinanet_cfg.RetinaNet( num_classes=num_classes, input_size=[input_size[0], input_size[1], 3], backbone=backbone_config, decoder=decoder_config, head=retinanet_cfg.RetinaNetHead(attribute_heads=None, use_separable_conv=True)) l2_regularizer = tf.keras.regularizers.l2(5e-5) # Build the original float32 retinanet model. model = factory.build_retinanet(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) # Call the model with dummy input to build the head part. dummpy_input = tf.zeros([1] + model_config.input_size) model(dummpy_input, training=True) # Build the QAT model from the original model with quantization config. qat_model = qat_factory.build_qat_retinanet( model=model, quantization=common.Quantization( quantize_detection_decoder=quantize_detection_decoder, quantize_detection_head=quantize_detection_head), model_config=model_config) if quantize_detection_head: # head become a RetinaNetHeadQuantized when we apply quantization. self.assertIsInstance( qat_model.head, qat_dense_prediction_heads.RetinaNetHeadQuantized) else: # head is a RetinaNetHead if we don't apply quantization on head part. self.assertIsInstance(qat_model.head, dense_prediction_heads.RetinaNetHead) self.assertNotIsInstance( qat_model.head, qat_dense_prediction_heads.RetinaNetHeadQuantized) if decoder_type == 'FPN': if quantize_detection_decoder: # FPN decoder become a general keras functional model after applying # quantization. self.assertNotIsInstance(qat_model.decoder, fpn.FPN) else: self.assertIsInstance(qat_model.decoder, fpn.FPN)
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: """COCO object detection with mobile RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size input_size = 384 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( backbone=backbones.Backbone( type='spinenet_mobile', spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=False)), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), head=RetinaNetHead(num_filters=48, use_separable_conv=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True, activation='swish'), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=3e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=600 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [575 * steps_per_epoch, 590 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ]) return config
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Cascade RCNN-RS with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), detection_head=DetectionHead( class_agnostic_bbox_pred=True, cascade_class_ensemble=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( use_sync_bn=True, activation='swish'), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 475, steps_per_epoch * 490 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config
def image_classification_imagenet_revnet() -> cfg.ExperimentConfig: """Returns a revnet config for image classification on imagenet.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[224, 224, 3], backbone=backbones.Backbone( type='revnet', revnet=backbones.RevNet(model_id=56)), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False), add_head_batch_norm=True), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=90 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ 30 * steps_per_epoch, 60 * steps_per_epoch, 80 * steps_per_epoch ], 'values': [0.8, 0.08, 0.008, 0.0008] } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig: """Image classification on imagenet with resnet-rs.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[160, 160, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50, stem_type='v1', resnetd_shortcut=True, replace_stem_max_pool=True, se_ratio=0.25, stochastic_depth_drop_rate=0.0)), dropout_rate=0.25, norm_activation=common.NormActivation(norm_momentum=0.0, norm_epsilon=1e-5, use_sync_bn=False, activation='swish')), losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, aug_type=common.Augmentation( type='randaug', randaug=common.RandAugment(magnitude=10))), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=350 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'ema': { 'average_decay': 0.9999, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 1.6, 'decay_steps': 350 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config