class ImagenetFt(ImageNet): """Finetune imagenet configs.""" # Finetune should have less regularization due to the limited training steps. cfg = hparams.Config( model=dict( dropout_rate=0.000001, survival_prob=0.8, ), train=dict( batch_size=512, stages=0, epochs=15, optimizer='rmsprop', lr_sched='constant', lr_base=0.0005, lr_warmup_epoch=1, ema_decay=0.9996, weight_decay=1e-5, label_smoothing=0.1, min_steps=10000, isize=1.0, ), data=dict( ds_name='imagenettfds', augname='ft', mixup_alpha=0, cutmix_alpha=0, ), )
class ImageNet21k(): """ImageNet21k train/eval configs.""" cfg = hparams.Config( model=dict( dropout_rate=0.000001, survival_prob=1.0, ), data=dict( ds_name='imagenet21k', multiclass=True, augname=None, # Disable all augmentation and mixup. mixup_alpha=0, cutmix_alpha=0, ), train=dict( epochs=60, lr_base=0.008, lr_warmup_epoch=1, lr_sched='cosine', label_smoothing=0.0, isize=224, stages=0, # do not apply staged training. sched=False, ), eval=dict( batch_size=128, isize=224, ), )
def distort_image_with_autoaugment(image, augmentation_name): """Applies the AutoAugment policy to `image`. AutoAugment is from the paper: https://arxiv.org/abs/1805.09501. Args: image: `Tensor` of shape [height, width, 3] representing an image. augmentation_name: The name of the AutoAugment policy to use. The available options are `v0` and `test`. `v0` is the policy used for all of the results in the paper and was found to achieve the best results on the COCO dataset. `v1`, `v2` and `v3` are additional good policies found on the COCO dataset that have slight variation in what operations were used during the search procedure along with how many operations are applied in parallel to a single image (2 vs 3). Returns: A tuple containing the augmented versions of `image`. """ available_policies = {'v0': policy_v0, 'test': policy_vtest} if augmentation_name not in available_policies: raise ValueError( 'Invalid augmentation_name: {}'.format(augmentation_name)) policy = available_policies[augmentation_name]() # Params that will be used for AutoAugment. augmentation_params = hparams.Config(cutout_const=100, translate_const=250) return build_and_apply_nas_policy(policy, image, augmentation_params)
class ImageNet(): """ImageNet train/eval configs.""" cfg = hparams.Config( data=dict( ds_name='imagenet', multiclass=False, ), train=dict( epochs=350, lr_base=0.016, lr_warmup_epoch=5, lr_sched='exponential', label_smoothing=0.1, ), eval=dict(batch_size=8, ), )
def distort_image_with_randaugment(image, num_layers, magnitude): """Applies the RandAugment policy to `image`. RandAugment is from the paper https://arxiv.org/abs/1909.13719, Args: image: `Tensor` of shape [height, width, 3] representing an image. num_layers: Integer, the number of augmentation transformations to apply sequentially to an image. Represented as (N) in the paper. Usually best values will be in the range [1, 3]. magnitude: Integer, shared magnitude across all augmentation operations. Represented as (M) in the paper. Usually best values are in the range [5, 30]. Returns: The augmented version of `image`. """ replace_value = [128] * 3 logging.info('Using RandAug.') augmentation_params = hparams.Config(cutout_const=40, translate_const=100) available_ops = [ 'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Cutout', 'SolarizeAdd' ] for layer_num in range(num_layers): op_to_select = tf.random_uniform([], maxval=len(available_ops), dtype=tf.int32) random_magnitude = float(magnitude) with tf.name_scope('randaug_layer_{}'.format(layer_num)): for (i, op_name) in enumerate(available_ops): prob = tf.random_uniform([], minval=0.2, maxval=0.8, dtype=tf.float32) func, _, args = _parse_policy_info(op_name, prob, random_magnitude, replace_value, augmentation_params) image = tf.cond(tf.equal(i, op_to_select), lambda selected_func=func, selected_args=args: selected_func(image, *selected_args), lambda: image) return image
def efficientnetv2_config(model_name='efficientnetv2-s'): """EfficientNetV2 model config.""" block, width, depth, train_size, eval_size, dropout, randaug, mix, aug = ( efficientnetv2_params[model_name]) cfg = hparams.Config( model=dict( model_name=model_name, blocks_args=BlockDecoder().decode(block), width_coefficient=width, depth_coefficient=depth, dropout_rate=dropout, ), train=dict(isize=train_size, stages=4, sched=True), eval=dict(isize=eval_size), data=dict(augname=aug, ram=randaug, mixup_alpha=mix, cutmix_alpha=mix), ) return cfg
def efficientnetv1_config(model_name='efficientnet-b0'): """EfficientNetV1 model config.""" width_coefficient, depth_coefficient, isize, dropout_rate = ( efficientnetv1_params[model_name]) cfg = hparams.Config( model=dict( model_name=model_name, blocks_args=BlockDecoder().decode(v1_b0_block_str), width_coefficient=width_coefficient, depth_coefficient=depth_coefficient, dropout_rate=dropout_rate, ), eval=dict(isize=isize), train=dict(isize=0.8), # 80% of eval size data=dict(augname='effnetv1_autoaug'), ) return cfg
def _decode_block_string(self, block_string): """Gets a block through a string notation of arguments.""" assert isinstance(block_string, str) ops = block_string.split('_') options = {} for op in ops: splits = re.split(r'(\d.*)', op) if len(splits) >= 2: key, value = splits[:2] options[key] = value return hparams.Config( kernel_size=int(options['k']), num_repeat=int(options['r']), input_filters=int(options['i']), output_filters=int(options['o']), expand_ratio=int(options['e']), se_ratio=float(options['se']) if 'se' in options else None, strides=int(options['s']), conv_type=int(options['c']) if 'c' in options else 0, )
class ImageNetInput(): """Generates ImageNet input_fn from a series of TFRecord files. The format of the data required is created by the script at: https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py """ cfg = hparams.Config( data_dir=None, num_classes=1000, multiclass=False, tfds_split=None, splits=dict( train=dict(num_images=1_256_144, files='train*', slice=slice(20, None)), minival=dict(num_images=25_021, files='train*', slice=slice(0, 20)), eval=dict(num_images=50_000, files='val*', slice=slice(0, None)), trainval=dict(num_images=1_281_167, files='train*', slice=slice(0, None)), ), )