def simclr_pretraining_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size return cfg.ExperimentConfig( task=SimCLRPretrainTask( model=SimCLRModel( mode=simclr_model.PRETRAIN, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead(proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=True)), loss=ContrastiveLoss(), evaluation=Evaluation(), train_data=DataConfig(parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.000001, 'exclude_from_weight_decay': ['batch_normalization', 'bias'] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.2 * BatchSize / 256 'initial_learning_rate': 0.2 * train_batch_size / 256, # train_steps - warmup_steps 'decay_steps': 475 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { # 5% of total epochs 'warmup_steps': 25 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def seg_resnetfpn_pascal() -> cfg.ExperimentConfig: """Image segmentation on pascal voc with resnet-fpn.""" train_batch_size = 256 eval_batch_size = 32 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[512, 512, 3], min_level=3, max_level=7, backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), decoder=decoders.Decoder(type='fpn', fpn=decoders.FPN()), head=SegmentationHead(level=3, num_convs=3), norm_activation=common.NormActivation(activation='swish', use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'train_aug*'), is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.2, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=450 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 450 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on cityscapes with mobilenetv2 deeplabv3.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [] pool_kernel_size = [512, 1024] level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone(type='mobilenet', mobilenet=backbones.MobileNet( model_id='MobileNetV2', output_stride=output_stride)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=pool_kernel_size)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # Coco pre-trained mobilenetv2 checkpoint init_checkpoint= 'gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=100000, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, best_checkpoint_eval_metric='mean_iou', best_checkpoint_export_subdir='best_ckpt', best_checkpoint_metric_comp='higher', optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 100000, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def retinanet_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet using SpineNet backbone.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size input_size = 640 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet(backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49', stochastic_depth_drop_rate=0.2)), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( use_sync_bn=True, activation='swish'), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=500 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [475 * steps_per_epoch, 490 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with deep mask heads.""" global_batch_size = 64 steps_per_epoch = int(retinanet_config.COCO_TRAIN_EXAMPLES / global_batch_size) coco_val_samples = 5000 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=DeepMaskHeadRCNNTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=DeepMaskHeadRCNN(num_classes=91, input_size=[1024, 1024, 3], include_mask=True), # pytype: disable=wrong-keyword-args losses=maskrcnn_config.Losses(l2_weight_decay=0.00004), train_data=maskrcnn_config.DataConfig( input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=global_batch_size, parser=maskrcnn_config.Parser(aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=maskrcnn_config.DataConfig(input_path=os.path.join( maskrcnn_config.COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=8)), # pytype: disable=wrong-keyword-args trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=coco_val_samples // 8, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: """Image classification on imagenet with mobilenet.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, dropout_rate=0.2, input_size=[224, 224, 3], backbone=backbones.Backbone(type='mobilenet', mobilenet=backbones.MobileNet( model_id='MobileNetV2', filter_size_scale=1.0)), norm_activation=common.NormActivation(norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1), train_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'rmsprop', 'rmsprop': { 'rho': 0.9, 'momentum': 0.9, 'epsilon': 0.002, } }, 'learning_rate': { 'type': 'exponential', 'exponential': { 'initial_learning_rate': 0.008 * (train_batch_size // 128), 'decay_steps': int(2.5 * steps_per_epoch), 'decay_rate': 0.98, 'staircase': True } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } }, })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def panoptic_deeplab_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Deeplab.""" train_steps = 200000 train_batch_size = 64 eval_batch_size = 1 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size num_panoptic_categories = 201 num_thing_categories = 91 ignore_label = 0 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) input_size = [640, 640, 3] output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16', enable_xla=True), task=PanopticDeeplabTask( init_checkpoint= 'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticDeeplab( num_classes=num_panoptic_categories, input_size=input_size, backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=50, stem_type=stem_type, output_stride=output_stride, multigrid=multigrid, se_ratio=0.25, last_stage_repeats=1, stochastic_depth_drop_rate=0.2)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, num_filters=256, pool_kernel_size=input_size[:2], dilation_rates=aspp_dilation_rates, use_depthwise_convolution=True, dropout_rate=0.1)), semantic_head=SemanticHead(level=level, num_convs=1, num_filters=256, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[64, 32], fusion_num_output_filters=256, prediction_kernel_size=1), instance_head=InstanceHead(level=level, num_convs=1, num_filters=32, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[32, 16], fusion_num_output_filters=128, prediction_kernel_size=1), shared_decoder=False, generate_panoptic_masks=True, post_processor=PanopticDeeplabPostProcessor( output_size=input_size[:2], center_score_threshold=0.1, thing_class_ids=list(range(1, num_thing_categories)), label_divisor=256, stuff_area_limit=4096, ignore_label=ignore_label, nms_kernel=41, keep_k_centers=200, rescale_predictions=True)), losses=Losses(label_smoothing=0.0, ignore_label=ignore_label, l2_weight_decay=0.0, top_k_percent_pixels=0.2, segmentation_loss_weight=1.0, center_heatmap_loss_weight=200, center_offset_loss_weight=0.01), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_scale_min=0.5, aug_scale_max=1.5, aug_rand_hflip=True, aug_type=common.Augmentation( type='autoaug', autoaug=common.AutoAugment( augmentation_name='panoptic_deeplab_policy')), sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser(resize_eval_groundtruth=False, groundtruth_padded_size=[640, 640], aug_scale_min=1.0, aug_scale_max=1.0, aug_rand_hflip=False, aug_type=None, sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0), drop_remainder=False), evaluation=Evaluation(ignored_label=ignore_label, max_instances_per_category=256, offset=256 * 256 * 256, is_thing=is_thing, rescale_predictions=True, report_per_class_pq=False, report_per_class_iou=False, report_train_mean_iou=False)), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0005, 'decay_steps': train_steps, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with Faster R-CNN.""" steps_per_epoch = 500 coco_val_samples = 5000 train_batch_size = 64 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN(num_classes=91, input_size=[1024, 1024, 3], include_mask=False, mask_head=None, mask_sampler=None, mask_roi_aligner=None), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Cascade RCNN-RS with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone(type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), detection_head=DetectionHead(class_agnostic_bbox_pred=True, cascade_class_ensemble=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True, activation='swish'), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [steps_per_epoch * 475, steps_per_epoch * 490], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config
def autoseg_edgetpu_experiment_config( backbone_name: str, init_backbone: bool = True) -> cfg.ExperimentConfig: """Experiment using the semantic segmenatation searched model. Args: backbone_name: Name of the backbone used for this model init_backbone: Whether to initialize backbone from a pretrained checkpoint Returns: ExperimentConfig """ epochs = 300 train_batch_size = 64 eval_batch_size = 32 image_size = 512 steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size train_steps = epochs * steps_per_epoch model_config = AutosegEdgeTPUModelConfig( num_classes=32, input_size=[image_size, image_size, 3]) model_config.model_params.model_name = backbone_name if init_backbone: model_config.model_params.model_weights_path = ( BACKBONE_PRETRAINED_CHECKPOINT[backbone_name]) model_config.model_params.overrides.resolution = image_size config = cfg.ExperimentConfig( task=AutosegEdgeTPUTaskConfig( model=model_config, train_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'), output_size=[image_size, image_size], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'), output_size=[image_size, image_size], is_training=False, resize_eval_groundtruth=True, drop_remainder=True, global_batch_size=eval_batch_size), evaluation=base_cfg.Evaluation(report_train_mean_iou=False)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch * 5, max_to_keep=10, train_steps=train_steps, validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'nesterov': True, 'momentum': 0.9, } }, 'ema': { 'average_decay': 0.9998, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.12, 'decay_steps': train_steps } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } }, })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_ade20k_32(backbone: str, init_backbone: bool = True ) -> cfg.ExperimentConfig: """Semantic segmentation on ADE20K dataset with deeplabv3+.""" epochs = 200 train_batch_size = 128 eval_batch_size = 32 image_size = 512 steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size aspp_dilation_rates = [5, 10, 15] pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[ backbone] if init_backbone else None config = cfg.ExperimentConfig( task=CustomSemanticSegmentationTaskConfig( model=base_cfg.SemanticSegmentationModel( # ADE20K uses only 32 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=32, input_size=[None, None, 3], backbone=Backbone( type='mobilenet_edgetpu', mobilenet_edgetpu=MobileNetEdgeTPU( model_id=backbone, pretrained_checkpoint_path=pretrained_checkpoint_path, freeze_large_filters=500, )), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=BACKBONE_HEADPOINT[backbone], use_depthwise_convolution=True, dilation_rates=aspp_dilation_rates, pool_kernel_size=[256, 256], num_filters=128, dropout_rate=0.3, )), head=base_cfg.SegmentationHead( level=BACKBONE_HEADPOINT[backbone], num_convs=2, num_filters=256, use_depthwise_convolution=True, feature_fusion='deeplabv3plus', low_level=BACKBONE_LOWER_FEATURES[backbone], low_level_num_filters=48), norm_activation=common.NormActivation(activation='relu', norm_momentum=0.99, norm_epsilon=2e-3, use_sync_bn=False)), train_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'), output_size=[image_size, image_size], is_training=True, global_batch_size=train_batch_size), validation_data=base_cfg.DataConfig( input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'), output_size=[image_size, image_size], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), evaluation=base_cfg.Evaluation(report_train_mean_iou=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=epochs * steps_per_epoch, validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0001, 'decay_steps': epochs * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 4 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
from official.nlp.data import pretrain_dataloader from official.nlp.data import pretrain_dynamic_dataloader from official.nlp.tasks import masked_lm _TRAINER = cfg.TrainerConfig(train_steps=1000000, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.01, 'exclude_from_weight_decay': ['LayerNorm', 'layer_norm', 'bias'], } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 1e-4, 'end_learning_rate': 0.0, } }, 'warmup': { 'type': 'polynomial' } })) @exp_factory.register_config_factory('bert/pretraining') def bert_pretraining() -> cfg.ExperimentConfig: """BERT pretraining experiment."""
def maskrcnn_mobilenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with MobileNet backbone.""" steps_per_epoch = 232 coco_val_samples = 5000 train_batch_size = 512 eval_batch_size = 512 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='mobilenet', mobilenet=backbones.MobileNet(model_id='MobileNetV2')), decoder=decoders.Decoder( type='fpn', fpn=decoders.FPN(num_filters=128, use_separable_conv=True)), rpn_head=RPNHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. detection_head=DetectionHead( use_separable_conv=True, num_filters=128, fc_dims=512), # 1/2 of original channels. mask_head=MaskHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( activation='relu6', norm_momentum=0.99, norm_epsilon=0.001, use_sync_bn=True), num_classes=91, input_size=[512, 512, 3], min_level=3, max_level=6, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 320, steps_per_epoch * 340 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ]) return config
def simclr_finetuning_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 1024 eval_batch_size = 1024 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size pretrain_model_base = '' return cfg.ExperimentConfig( task=SimCLRFinetuneTask( model=SimCLRModel( mode=simclr_model.FINETUNE, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead(proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001, zero_init=True), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), loss=ClassificationLosses(), evaluation=Evaluation(), train_data=DataConfig(parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), init_checkpoint=pretrain_model_base, # all, backbone_projection or backbone init_checkpoint_modules='backbone_projection'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=60 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.0, 'exclude_from_weight_decay': ['batch_normalization', 'bias'] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.01 × BatchSize / 512 'initial_learning_rate': 0.01 * train_batch_size / 512, 'decay_steps': 60 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig: """Image classification on imagenet with resnet-rs.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[160, 160, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50, stem_type='v1', resnetd_shortcut=True, replace_stem_max_pool=True, se_ratio=0.25, stochastic_depth_drop_rate=0.0)), dropout_rate=0.25, norm_activation=common.NormActivation(norm_momentum=0.0, norm_epsilon=1e-5, use_sync_bn=False, activation='swish')), losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, aug_type=common.Augmentation( type='randaug', randaug=common.RandAugment(magnitude=10))), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=350 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'ema': { 'average_decay': 0.9999, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 1.6, 'decay_steps': 350 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Mask R-CNN.""" train_batch_size = 64 eval_batch_size = 8 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=PanopticMaskRCNNTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticMaskRCNN( num_classes=91, input_size=[1024, 1024, 3], segmentation_model=SEGMENTATION_MODEL( num_classes=91, head=SEGMENTATION_HEAD(level=3))), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False), annotation_file=os.path.join(_COCO_INPUT_PATH_BASE, 'instances_val2017.json')), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_revnet() -> cfg.ExperimentConfig: """Returns a revnet config for image classification on imagenet.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[224, 224, 3], backbone=backbones.Backbone( type='revnet', revnet=backbones.RevNet(model_id=56)), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False), add_head_batch_norm=True), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=90 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ 30 * steps_per_epoch, 60 * steps_per_epoch, 80 * steps_per_epoch ], 'values': [0.8, 0.08, 0.008, 0.0008] } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_vit_pretrain() -> cfg.ExperimentConfig: """Image classification on imagenet with vision transformer.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel(num_classes=1001, input_size=[224, 224, 3], kernel_initializer='zeros', backbone=backbones.Backbone( type='vit', vit=backbones.VisionTransformer( model_name='vit-b16', representation_size=768))), losses=Losses(l2_weight_decay=0.0), train_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=300 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.3, 'include_in_weight_decay': r'.*(kernel|weight):0$', 'gradient_clip_norm': 0.0 } }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.003 * train_batch_size / 4096, 'decay_steps': 300 * steps_per_epoch, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 10000, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def wmt_transformer_large() -> cfg.ExperimentConfig: """WMT Transformer Large. Please refer to tensorflow_models/official/nlp/data/train_sentencepiece.py to generate sentencepiece_model and pass --params_override=task.sentencepiece_model_path='YOUR_PATH' to the train script. """ learning_rate = 2.0 hidden_size = 1024 learning_rate *= (hidden_size**-0.5) warmup_steps = 16000 train_steps = 300000 token_batch_size = 24576 encdecoder = translation.EncDecoder( num_attention_heads=16, intermediate_size=hidden_size * 4) config = cfg.ExperimentConfig( task=translation.TranslationConfig( model=translation.ModelConfig( encoder=encdecoder, decoder=encdecoder, embedding_width=hidden_size, padded_decode=True, decode_max_length=100), train_data=wmt_dataloader.WMTDataConfig( tfds_name='wmt14_translate/de-en', tfds_split='train', src_lang='en', tgt_lang='de', is_training=True, global_batch_size=token_batch_size, static_batch=True, max_seq_length=64 ), validation_data=wmt_dataloader.WMTDataConfig( tfds_name='wmt14_translate/de-en', tfds_split='test', src_lang='en', tgt_lang='de', is_training=False, global_batch_size=32, static_batch=True, max_seq_length=100, ), sentencepiece_model_path=None, ), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=-1, steps_per_loop=1000, summary_interval=1000, checkpoint_interval=5000, validation_interval=5000, max_to_keep=1, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', 'adam': { 'beta_2': 0.997, 'epsilon': 1e-9, }, }, 'learning_rate': { 'type': 'power', 'power': { 'initial_learning_rate': learning_rate, 'power': -0.5, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': warmup_steps, 'warmup_learning_rate': 0.0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.sentencepiece_model_path != None', ]) return config
def image_classification_imagenet_deit_pretrain() -> cfg.ExperimentConfig: """Image classification on imagenet with vision transformer.""" train_batch_size = 4096 # originally was 1024 but 4096 better for tpu v3-32 eval_batch_size = 4096 # originally was 1024 but 4096 better for tpu v3-32 num_classes = 1001 label_smoothing = 0.1 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=num_classes, input_size=[224, 224, 3], kernel_initializer='zeros', backbone=backbones.Backbone( type='vit', vit=backbones.VisionTransformer( model_name='vit-b16', representation_size=768, init_stochastic_depth_rate=0.1, original_init=False, transformer=backbones.Transformer( dropout_rate=0.0, attention_dropout_rate=0.0)))), losses=Losses(l2_weight_decay=0.0, label_smoothing=label_smoothing, one_hot=False, soft_labels=True), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, aug_type=common.Augmentation(type='randaug', randaug=common.RandAugment( magnitude=9, exclude_ops=['Cutout'])), mixup_and_cutmix=common.MixupAndCutmix( label_smoothing=label_smoothing)), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=300 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.05, 'include_in_weight_decay': r'.*(kernel|weight):0$', 'gradient_clip_norm': 0.0 } }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.0005 * train_batch_size / 512, 'decay_steps': 300 * steps_per_epoch, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_scooter() -> cfg.ExperimentConfig: """Image segmentation on scooter dataset with resnet deeplabv3+. Barebones config for testing purpose (modify batch size, initial lr, steps per epoch, train input path, val input path) """ scooter_path_glob = 'D:/data/test_data/val**' steps_per_epoch = 1 output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=19, input_size=[512, 512, 3], # specifying this speeds up model inference, no change in size backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP( level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead( level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation( activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses( l2_weight_decay=1e-4, ignore_label=250), train_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=True, global_batch_size=1, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig( input_path=scooter_path_glob, output_size=[512, 512], is_training=False, global_batch_size=1, resize_eval_groundtruth=True, drop_remainder=False)), # resnet101 # init_checkpoint='D:/repos/data_root/test_data/deeplab_cityscapes_pretrained/model.ckpt', # init_checkpoint_modules='all'), # init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', # init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=1021, validation_interval=steps_per_epoch, continuous_eval_timeout=1, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def prepare_config(self, teacher_block_num, student_block_num, transfer_teacher_layers): # using small model for testing task_config = distillation.BertDistillationTaskConfig( teacher_model=bert.PretrainerConfig( encoder=encoders.EncoderConfig( type='mobilebert', mobilebert=encoders.MobileBertEncoderConfig( num_blocks=teacher_block_num)), cls_heads=[ bert.ClsHeadConfig( inner_dim=256, num_classes=2, dropout_rate=0.1, name='next_sentence') ], mlm_activation='gelu'), student_model=bert.PretrainerConfig( encoder=encoders.EncoderConfig( type='mobilebert', mobilebert=encoders.MobileBertEncoderConfig( num_blocks=student_block_num)), cls_heads=[ bert.ClsHeadConfig( inner_dim=256, num_classes=2, dropout_rate=0.1, name='next_sentence') ], mlm_activation='relu'), train_data=pretrain_dataloader.BertPretrainDataConfig( input_path='dummy', max_predictions_per_seq=76, seq_length=512, global_batch_size=10), validation_data=pretrain_dataloader.BertPretrainDataConfig( input_path='dummy', max_predictions_per_seq=76, seq_length=512, global_batch_size=10)) # set only 1 step for each stage progressive_config = distillation.BertDistillationProgressiveConfig() progressive_config.layer_wise_distill_config.transfer_teacher_layers = ( transfer_teacher_layers) progressive_config.layer_wise_distill_config.num_steps = 1 progressive_config.pretrain_distill_config.num_steps = 1 optimization_config = optimization.OptimizationConfig( optimizer=optimization.OptimizerConfig( type='lamb', lamb=optimization.LAMBConfig( weight_decay_rate=0.0001, exclude_from_weight_decay=[ 'LayerNorm', 'layer_norm', 'bias', 'no_norm' ])), learning_rate=optimization.LrConfig( type='polynomial', polynomial=optimization.PolynomialLrConfig( initial_learning_rate=1.5e-3, decay_steps=10000, end_learning_rate=1.5e-3)), warmup=optimization.WarmupConfig( type='linear', linear=optimization.LinearWarmupConfig(warmup_learning_rate=0))) exp_config = cfg.ExperimentConfig( task=task_config, trainer=prog_trainer_lib.ProgressiveTrainerConfig( progressive=progressive_config, optimizer_config=optimization_config)) # Create a teacher model checkpoint. teacher_encoder = encoders.build_encoder(task_config.teacher_model.encoder) pretrainer_config = task_config.teacher_model if pretrainer_config.cls_heads: teacher_cls_heads = [ layers.ClassificationHead(**cfg.as_dict()) for cfg in pretrainer_config.cls_heads ] else: teacher_cls_heads = [] masked_lm = layers.MobileBertMaskedLM( embedding_table=teacher_encoder.get_embedding_table(), activation=tf_utils.get_activation(pretrainer_config.mlm_activation), initializer=tf.keras.initializers.TruncatedNormal( stddev=pretrainer_config.mlm_initializer_range), name='cls/predictions') teacher_pretrainer = models.BertPretrainerV2( encoder_network=teacher_encoder, classification_heads=teacher_cls_heads, customized_masked_lm=masked_lm) # The model variables will be created after the forward call. _ = teacher_pretrainer(teacher_pretrainer.inputs) teacher_pretrainer_ckpt = tf.train.Checkpoint( **teacher_pretrainer.checkpoint_items) teacher_ckpt_path = os.path.join(self.get_temp_dir(), 'teacher_model.ckpt') teacher_pretrainer_ckpt.save(teacher_ckpt_path) exp_config.task.teacher_model_init_checkpoint = self.get_temp_dir() return exp_config
def deep_mask_head_rcnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=DeepMaskHeadRCNNTask( annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, 'instances_val2017.json'), # pytype: disable=wrong-keyword-args model=DeepMaskHeadRCNN( backbone=backbones.Backbone(type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), anchor=maskrcnn_config.Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), # pytype: disable=wrong-keyword-args losses=maskrcnn_config.Losses(l2_weight_decay=0.00004), train_data=maskrcnn_config.DataConfig( input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=maskrcnn_config.Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=maskrcnn_config.DataConfig( input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), # pytype: disable=wrong-keyword-args trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [steps_per_epoch * 320, steps_per_epoch * 340], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config
def panoptic_fpn_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Mask R-CNN.""" train_batch_size = 64 eval_batch_size = 8 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size # coco panoptic dataset has category ids ranging from [0-200] inclusive. # 0 is not used and represents the background class # ids 1-91 represent thing categories (91) # ids 92-200 represent stuff categories (109) # for the segmentation task, we continue using id=0 for the background # and map all thing categories to id=1, the remaining 109 stuff categories # are shifted by an offset=90 given by num_thing classes - 1. This shifting # will make all the stuff categories begin from id=2 and end at id=110 num_panoptic_categories = 201 num_thing_categories = 91 num_semantic_segmentation_classes = 111 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32', enable_xla=True), task=PanopticMaskRCNNTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticMaskRCNN( num_classes=91, input_size=[1024, 1024, 3], panoptic_segmentation_generator=PanopticSegmentationGenerator( output_size=[640, 640], rescale_predictions=True), stuff_classes_offset=90, segmentation_model=SEGMENTATION_MODEL( num_classes=num_semantic_segmentation_classes, head=SEGMENTATION_HEAD( level=2, num_convs=0, num_filters=128, decoder_min_level=2, decoder_max_level=6, feature_fusion='panoptic_fpn_fusion'))), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( _COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser(segmentation_resize_eval_groundtruth=False, segmentation_groundtruth_padded_size=[640, 640]), drop_remainder=False), annotation_file=os.path.join(_COCO_INPUT_PATH_BASE, 'instances_val2017.json'), segmentation_evaluation=semantic_segmentation.Evaluation( report_per_class_iou=False, report_train_mean_iou=False), panoptic_quality_evaluator=PanopticQualityEvaluator( num_categories=num_panoptic_categories, ignored_label=0, is_thing=is_thing, rescale_predictions=True)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig: """Image segmentation on pascal voc with resnet deeplabv3.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [12, 24, 36] # [6, 12, 18] if output_stride = 16 multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, multigrid=multigrid, stem_type=stem_type)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates)), head=SegmentationHead(level=level, num_convs=0), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def mobilenet_edgetpu_base_experiment_config( model_name: str) -> cfg.ExperimentConfig: """Image classification on imagenet with mobilenet_edgetpu. Experiment config common across all mobilenet_edgetpu variants. Args: model_name: Name of the mobilenet_edgetpu model variant Returns: ExperimentConfig """ train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig( num_classes=1001, input_size=[224, 224, 3]) mobilenet_edgetpu_config.model_params.model_name = model_name config = cfg.ExperimentConfig( task=MobilenetEdgeTPUTaskConfig( model=mobilenet_edgetpu_config, losses=base_config.Losses(label_smoothing=0.1), train_data=base_config.DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, dtype='bfloat16', aug_type=common.Augmentation(type='autoaug')), validation_data=base_config.DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, dtype='bfloat16', drop_remainder=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch * 5, max_to_keep=10, train_steps=550 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'rmsprop', 'rmsprop': { 'rho': 0.9, 'momentum': 0.9, 'epsilon': 0.001, } }, 'ema': { 'average_decay': 0.99, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'exponential', 'exponential': { 'initial_learning_rate': 0.008 * (train_batch_size // 128), 'decay_steps': int(2.4 * steps_per_epoch), 'decay_rate': 0.97, 'staircase': True } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } }, })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig: """Image segmentation on cityscapes with resnet deeplabv3+.""" train_batch_size = 16 eval_batch_size = 16 steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( # Cityscapes uses only 19 semantic classes for train/evaluation. # The void (background) class is ignored in train and evaluation. num_classes=19, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=101, output_stride=output_stride, stem_type=stem_type, multigrid=multigrid)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, pool_kernel_size=[512, 1024])), head=SegmentationHead(level=level, num_convs=2, feature_fusion='deeplabv3plus', low_level=2, low_level_num_filters=48), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'), crop_size=[512, 1024], output_size=[1024, 2048], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=2.0), validation_data=DataConfig(input_path=os.path.join( CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), output_size=[1024, 2048], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=True, drop_remainder=False), # resnet101 init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.01, 'decay_steps': 500 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
from official.modeling import performance from official.modeling.fast_training.progressive import train_lib from official.modeling.fast_training.progressive import trainer as prog_trainer_lib from official.nlp.data import pretrain_dataloader from official.nlp.projects.mobilebert import distillation FLAGS = flags.FLAGS optimization_config = optimization.OptimizationConfig( optimizer=optimization.OptimizerConfig( type='lamb', lamb=optimization.LAMBConfig( weight_decay_rate=0.01, exclude_from_weight_decay=['LayerNorm', 'bias', 'norm'], clipnorm=1.0)), learning_rate=optimization.LrConfig( type='polynomial', polynomial=optimization.PolynomialLrConfig( initial_learning_rate=1.5e-3, decay_steps=10000, end_learning_rate=1.5e-3)), warmup=optimization.WarmupConfig( type='linear', linear=optimization.LinearWarmupConfig(warmup_learning_rate=0))) # copy from progressive/utils.py due to the private visibility issue. def config_override(params, flags_obj): """Override ExperimentConfig according to flags.""" # Change runtime.tpu to the real tpu. params.override({'runtime': { 'tpu': flags_obj.tpu,
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=RetinaNetTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=72 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ 57 * steps_per_epoch, 67 * steps_per_epoch ], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def seg_deeplabv2_pascal() -> cfg.ExperimentConfig: """Image segmentation on imagenet with vggnet & resnet deeplabv2.""" train_batch_size = 16 eval_batch_size = 8 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size # for Large FOV fov_dilation_rates = 12 kernel_size = 3 # for ASPP aspp_dilation_rates = [6, 12, 18, 24] output_stride = 16 level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[None, None, 3], backbone=backbones.Backbone( type='dilated_vggnet', dilated_vggnet=backbones.DilatedVGGNet(model_id=16)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, dilation_rates=aspp_dilation_rates, stem_type='v2', num_filters=1024, use_sync_bn=True)), head=SegmentationHead(level=level, num_convs=0, low_level_num_filters=1024, feature_fusion='deeplabv2'), norm_activation=common.NormActivation(activation='swish', norm_momentum=0.9997, norm_epsilon=1e-3, use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), # TODO(arashwan): test changing size to 513 to match deeplab. output_size=[512, 512], is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.5, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), output_size=[512, 512], is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), # resnet101 init_checkpoint='/home/gunho1123/ckpt_vggnet16_deeplab/', init_checkpoint_modules='backbone'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=45 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 45 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config