def __init__(self, num_classes, num_convs=0, num_filters=256, use_separable_conv=False, num_fcs=2, fc_dims=1024, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_regularizer=None, bias_regularizer=None, **kwargs): """Initialize params to build the detection head. Args: num_classes: a integer for the number of classes. num_convs: `int` number that represents the number of the intermediate conv layers before the FC layers. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. num_fcs: `int` number that represents the number of FC layers before the predictions. fc_dims: `int` number that represents the number of dimension of the FC layers. activation: `string`, indicating which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: `bool`, whether to use synchronized batch normalization across different replicas. norm_momentum: `float`, the momentum parameter of the normalization layers. norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer kernel. bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. **kwargs: other keyword arguments passed to Layer. """ super(DetectionHead, self).__init__(**kwargs) self._config_dict = { 'num_classes': num_classes, 'num_convs': num_convs, 'num_filters': num_filters, 'use_separable_conv': use_separable_conv, 'num_fcs': num_fcs, 'fc_dims': fc_dims, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation = tf_utils.get_activation(activation)
def build_encoder(config: EncoderConfig, embedding_layer: Optional[tf.keras.layers.Layer] = None, encoder_cls=None, bypass_config: bool = False): """Instantiate a Transformer encoder network from EncoderConfig. Args: config: the one-of encoder config, which provides encoder parameters of a chosen encoder. embedding_layer: an external embedding layer passed to the encoder. encoder_cls: an external encoder cls not included in the supported encoders, usually used by gin.configurable. bypass_config: whether to ignore config instance to create the object with `encoder_cls`. Returns: An encoder instance. """ encoder_type = config.type encoder_cfg = config.get() encoder_cls = encoder_cls or ENCODER_CLS[encoder_type] logging.info("Encoder class: %s to build...", encoder_cls.__name__) if bypass_config: return encoder_cls() if encoder_cls.__name__ == "EncoderScaffold": embedding_cfg = dict( vocab_size=encoder_cfg.vocab_size, type_vocab_size=encoder_cfg.type_vocab_size, hidden_size=encoder_cfg.hidden_size, max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dropout_rate=encoder_cfg.dropout_rate, ) hidden_cfg = dict( num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_activation=tf_utils.get_activation( encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), ) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cfg=hidden_cfg, num_hidden_instances=encoder_cfg.num_layers, pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs, dict_outputs=True) return encoder_cls(**kwargs) if encoder_type == "mobilebert": return encoder_cls( word_vocab_size=encoder_cfg.word_vocab_size, word_embed_size=encoder_cfg.word_embed_size, type_vocab_size=encoder_cfg.type_vocab_size, max_sequence_length=encoder_cfg.max_sequence_length, num_blocks=encoder_cfg.num_blocks, hidden_size=encoder_cfg.hidden_size, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_act_fn=encoder_cfg.hidden_activation, hidden_dropout_prob=encoder_cfg.hidden_dropout_prob, attention_probs_dropout_prob=encoder_cfg. attention_probs_dropout_prob, intra_bottleneck_size=encoder_cfg.intra_bottleneck_size, initializer_range=encoder_cfg.initializer_range, use_bottleneck_attention=encoder_cfg.use_bottleneck_attention, key_query_shared_bottleneck=encoder_cfg. key_query_shared_bottleneck, num_feedforward_networks=encoder_cfg.num_feedforward_networks, normalization_type=encoder_cfg.normalization_type, classifier_activation=encoder_cfg.classifier_activation) if encoder_type == "albert": return encoder_cls( vocab_size=encoder_cfg.vocab_size, embedding_width=encoder_cfg.embedding_width, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, max_sequence_length=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dict_outputs=True) if encoder_type == "bigbird": return encoder_cls( vocab_size=encoder_cfg.vocab_size, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, num_rand_blocks=encoder_cfg.num_rand_blocks, block_size=encoder_cfg.block_size, max_position_embeddings=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), embedding_width=encoder_cfg.embedding_width) if encoder_type == "xlnet": return encoder_cls( vocab_size=encoder_cfg.vocab_size, num_layers=encoder_cfg.num_layers, hidden_size=encoder_cfg.hidden_size, num_attention_heads=encoder_cfg.num_attention_heads, head_size=encoder_cfg.head_size, inner_size=encoder_cfg.inner_size, dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, attention_type=encoder_cfg.attention_type, bi_data=encoder_cfg.bi_data, two_stream=encoder_cfg.two_stream, tie_attention_biases=encoder_cfg.tie_attention_biases, memory_length=encoder_cfg.memory_length, clamp_length=encoder_cfg.clamp_length, reuse_length=encoder_cfg.reuse_length, inner_activation=encoder_cfg.inner_activation, use_cls_mask=encoder_cfg.use_cls_mask, embedding_width=encoder_cfg.embedding_width, initializer=tf.keras.initializers.RandomNormal( stddev=encoder_cfg.initializer_range)) # Uses the default BERTEncoder configuration schema to create the encoder. # If it does not match, please add a switch branch by the encoder type. return encoder_cls( vocab_size=encoder_cfg.vocab_size, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, max_sequence_length=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), embedding_width=encoder_cfg.embedding_size, embedding_layer=embedding_layer, return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs, dict_outputs=True)
def __init__( self, backbone, normalize_feature, hidden_dim, hidden_layer_num, hidden_norm_args, projection_dim, input_specs: Optional[Mapping[str, tf.keras.layers.InputSpec]] = None, dropout_rate: float = 0.0, aggregate_endpoints: bool = False, kernel_initializer='random_uniform', kernel_regularizer=None, bias_regularizer=None, **kwargs): """Video Classification initialization function. Args: backbone: a 3d backbone network. normalize_feature: whether normalize backbone feature. hidden_dim: `int` number of hidden units in MLP. hidden_layer_num: `int` number of hidden layers in MLP. hidden_norm_args: `dict` for batchnorm arguments in MLP. projection_dim: `int` number of ouput dimension for MLP. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. dropout_rate: `float` rate for dropout regularization. aggregate_endpoints: `bool` aggregate all end ponits or only use the final end point. kernel_initializer: kernel initializer for the dense layer. kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to None. bias_regularizer: tf.keras.regularizers.Regularizer object. Default to None. **kwargs: keyword arguments to be passed. """ if not input_specs: input_specs = { 'image': layers.InputSpec(shape=[None, None, None, None, 3]) } self._self_setattr_tracking = False self._config_dict = { 'backbone': backbone, 'normalize_feature': normalize_feature, 'hidden_dim': hidden_dim, 'hidden_layer_num': hidden_layer_num, 'use_sync_bn': hidden_norm_args.use_sync_bn, 'norm_momentum': hidden_norm_args.norm_momentum, 'norm_epsilon': hidden_norm_args.norm_epsilon, 'activation': hidden_norm_args.activation, 'projection_dim': projection_dim, 'input_specs': input_specs, 'dropout_rate': dropout_rate, 'aggregate_endpoints': aggregate_endpoints, 'kernel_initializer': kernel_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, } self._input_specs = input_specs self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer self._backbone = backbone inputs = { k: tf.keras.Input(shape=v.shape[1:]) for k, v in input_specs.items() } endpoints = backbone(inputs['image']) if aggregate_endpoints: pooled_feats = [] for endpoint in endpoints.values(): x_pool = tf.keras.layers.GlobalAveragePooling3D()(endpoint) pooled_feats.append(x_pool) x = tf.concat(pooled_feats, axis=1) else: x = endpoints[max(endpoints.keys())] x = tf.keras.layers.GlobalAveragePooling3D()(x) # L2 Normalize feature after backbone if normalize_feature: x = tf.nn.l2_normalize(x, axis=-1) # MLP hidden layers for _ in range(hidden_layer_num): x = tf.keras.layers.Dense(hidden_dim)(x) if self._config_dict['use_sync_bn']: x = tf.keras.layers.experimental.SyncBatchNormalization( momentum=self._config_dict['norm_momentum'], epsilon=self._config_dict['norm_epsilon'])(x) else: x = tf.keras.layers.BatchNormalization( momentum=self._config_dict['norm_momentum'], epsilon=self._config_dict['norm_epsilon'])(x) x = tf_utils.get_activation(self._config_dict['activation'])(x) # Projection head x = tf.keras.layers.Dense(projection_dim)(x) super(VideoSSLModel, self).__init__(inputs=inputs, outputs=x, **kwargs)
def _make_block_basic(self, input_tensor, first_block=True, filters=64, stride=2, radix=1, avd=False, avd_first=False, is_first=False): """Conv2d_BN_Relu->Bn_Relu_Conv2d """ x = input_tensor x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) short_cut = x inplanes = input_tensor.shape[-1] if stride != 1 or inplanes != filters * self.block_expansion: if self.avg_down: if self.dilation == 1: short_cut = AveragePooling2D( pool_size=stride, strides=stride, padding="same", data_format="channels_last")(short_cut) else: short_cut = AveragePooling2D( pool_size=1, strides=1, padding="same", data_format="channels_last")(short_cut) short_cut = Conv2D(filters, kernel_size=1, strides=1, padding="same", kernel_initializer="he_normal", use_bias=False, data_format="channels_last")(short_cut) else: short_cut = Conv2D(filters, kernel_size=1, strides=stride, padding="same", kernel_initializer="he_normal", use_bias=False, data_format="channels_last")(short_cut) group_width = int(filters * (self.bottleneck_width / 64.0)) * self.cardinality avd = avd and (stride > 1 or is_first) avd_first = avd_first if avd: avd_layer = AveragePooling2D(pool_size=3, strides=stride, padding="same", data_format="channels_last") stride = 1 if avd and avd_first: x = avd_layer(x) if radix >= 1: x = self._SplAtConv2d(x, filters=group_width, kernel_size=3, stride=stride, dilation=self.dilation, groups=self.cardinality, radix=radix) else: x = Conv2D(filters, kernel_size=3, strides=stride, padding="same", kernel_initializer="he_normal", dilation_rate=self.dilation, use_bias=False, data_format="channels_last")(x) if avd and not avd_first: x = avd_layer(x) # print('can') x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) x = Conv2D(filters, kernel_size=3, strides=1, padding="same", kernel_initializer="he_normal", dilation_rate=self.dilation, use_bias=False, data_format="channels_last")(x) m2 = Add()([x, short_cut]) return m2
def _build_scale_permuted_network(self, net, input_width, weighted_fusion=False): """Builds scale-permuted network.""" net_sizes = [int(math.ceil(input_width / 2**2))] * len(net) net_block_fns = [self._init_block_fn] * len(net) num_outgoing_connections = [0] * len(net) endpoints = {} for i, block_spec in enumerate(self._block_specs): # Find out specs for the target block. target_width = int(math.ceil(input_width / 2**block_spec.level)) target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] * self._filter_size_scale) target_block_fn = block_spec.block_fn # Resample then merge input0 and input1. parents = [] input0 = block_spec.input_offsets[0] input1 = block_spec.input_offsets[1] x0 = self._resample_with_alpha( inputs=net[input0], input_width=net_sizes[input0], input_block_fn=net_block_fns[input0], target_width=target_width, target_num_filters=target_num_filters, target_block_fn=target_block_fn, alpha=self._resample_alpha) parents.append(x0) num_outgoing_connections[input0] += 1 x1 = self._resample_with_alpha( inputs=net[input1], input_width=net_sizes[input1], input_block_fn=net_block_fns[input1], target_width=target_width, target_num_filters=target_num_filters, target_block_fn=target_block_fn, alpha=self._resample_alpha) parents.append(x1) num_outgoing_connections[input1] += 1 # Merge 0 outdegree blocks to the output block. if block_spec.is_output: for j, (j_feat, j_connections) in enumerate( zip(net, num_outgoing_connections)): if j_connections == 0 and (j_feat.shape[2] == target_width and j_feat.shape[3] == x0.shape[3]): parents.append(j_feat) num_outgoing_connections[j] += 1 # pylint: disable=g-direct-tensorflow-import if weighted_fusion: dtype = parents[0].dtype parent_weights = [ tf.nn.relu( tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format( i, j)), dtype=dtype)) for j in range(len(parents)) ] weights_sum = tf.add_n(parent_weights) parents = [ parents[i] * parent_weights[i] / (weights_sum + 0.0001) for i in range(len(parents)) ] # Fuse all parent nodes then build a new block. x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents)) x = self._block_group( inputs=x, filters=target_num_filters, strides=1, block_fn_cand=target_block_fn, block_repeats=self._block_repeats, stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 1, len(self._block_specs)), name='scale_permuted_block_{}'.format(i + 1)) net.append(x) net_sizes.append(target_width) net_block_fns.append(target_block_fn) num_outgoing_connections.append(0) # Save output feats. if block_spec.is_output: if block_spec.level in endpoints: raise ValueError( 'Duplicate feats found for output level {}.'.format( block_spec.level)) if (block_spec.level < self._min_level or block_spec.level > self._max_level): raise ValueError( 'Output level is out of range [{}, {}]'.format( self._min_level, self._max_level)) endpoints[str(block_spec.level)] = x return endpoints
def __init__(self, model_id, input_specs=layers.InputSpec(shape=[None, None, None, 3]), se_ratio=0.0, stochastic_depth_drop_rate=0.0, kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, **kwargs): """EfficientNet initialization function. Args: model_id: `str` model id of EfficientNet. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. se_ratio: `float` squeeze and excitation ratio for inverted bottleneck blocks. stochastic_depth_drop_rate: `float` drop rate for drop connect layer. kernel_initializer: kernel_initializer for convolutional layers. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. Default to None. bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. Default to None. activation: `str` name of the activation function. use_sync_bn: if True, use synchronized batch normalization. norm_momentum: `float` normalization omentum for the moving average. norm_epsilon: `float` small float added to variance to avoid dividing by zero. **kwargs: keyword arguments to be passed. """ self._model_id = model_id self._input_specs = input_specs self._se_ratio = se_ratio self._stochastic_depth_drop_rate = stochastic_depth_drop_rate self._use_sync_bn = use_sync_bn self._activation = activation self._kernel_initializer = kernel_initializer self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Build EfficientNet. inputs = tf.keras.Input(shape=input_specs.shape[1:]) width_scale = SCALING_MAP[model_id]['width_scale'] depth_scale = SCALING_MAP[model_id]['depth_scale'] # Build stem. x = layers.Conv2D(filters=nn_layers.round_filters(32, width_scale), kernel_size=3, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) # Build intermediate blocks. endpoints = {} endpoint_level = 2 decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale, depth_scale) for i, specs in enumerate(decoded_specs): x = self._block_group(inputs=x, specs=specs, name='block_group_{}'.format(i)) if specs.is_output: endpoints[str(endpoint_level)] = x endpoint_level += 1 # Build output specs for downstream tasks. self._output_specs = { l: endpoints[l].get_shape for l in endpoints.keys() } # Build the final conv for classification. x = layers.Conv2D(filters=nn_layers.round_filters(1280, width_scale), kernel_size=1, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x) super(EfficientNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
def __init__(self, model_id, input_specs=InputSpec(shape=[None, None, None, 3]), stem_type='v1', activation="relu", dropout_rate=0.2, radix=2, groups=1, bottleneck_width=64, block_expansion=4, avg_down=True, avd=True, avd_first=False, preact=False, using_basic_block=False, using_cb=False): self.channel_axis = -1 # not for change self.model_id = model_id self.activation = activation self.input_specs = input_specs self.dropout_rate = dropout_rate self.blocks_set = RESNEST_SPECS[model_id]['blocks_set'] self.radix = radix self.cardinality = groups self.bottleneck_width = bottleneck_width self.deep_stem = stem_type == 'v1' self.stem_width = RESNEST_SPECS[model_id]['stem_width'] self.block_expansion = block_expansion self.avg_down = avg_down self.avd = avd self.avd_first = avd_first self.dilation = 1 self.preact = preact self.using_basic_block = using_basic_block self.using_cb = using_cb # get_custom_objects().update({'mish': Mish(mish)}) input_sig = Input(shape=self.input_specs.shape[1:]) x = self._make_stem(input_sig, stem_width=self.stem_width, deep_stem=self.deep_stem) if self.preact is False: x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) print("stem_out", x.shape) x = MaxPool2D(pool_size=3, strides=2, padding="same", data_format="channels_last")(x) print("MaxPool2D out", x.shape) if self.preact is True: x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) endpoints = {} i = 0 if self.using_cb: second_x = x second_x = self._make_layer(x, blocks=self.blocks_set[0], filters=64, stride=1, is_first=False) second_x_tmp = self._make_Composite_layer(second_x, filters=x.shape[-1], upsample=False) print('layer 0 db_com', second_x_tmp.shape) x = Add()([second_x_tmp, x]) x = self._make_layer(x, blocks=self.blocks_set[0], filters=64, stride=1, is_first=False) endpoints[str(i + 2)] = x print("-" * 5, "layer 0 out", x.shape, "-" * 5) b1_b3_filters = [64, 128, 256, 512] for i in range(1, 4): if self.using_cb: second_x = self._make_layer(x, blocks=self.blocks_set[i], filters=b1_b3_filters[i], stride=2) second_x_tmp = self._make_Composite_layer(second_x, filters=x.shape[-1]) print('layer {} db_com out {}'.format(i, second_x_tmp.shape)) x = Add()([second_x_tmp, x]) x = self._make_layer(x, blocks=self.blocks_set[i], filters=b1_b3_filters[i], stride=2) print('----- layer {} out {} -----'.format(i, x.shape)) endpoints[str(i + 2)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} print(self._output_specs) super(ResNest, self).__init__(inputs=input_sig, outputs=endpoints)
def __init__(self, params: yt8m_cfg.DbofModel, num_frames: int = 30, num_classes: int = 3862, input_specs: layers.InputSpec = layers.InputSpec( shape=[None, None, 1152]), kernel_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, activation: str = "relu", use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, **kwargs): """YT8M initialization function. Args: params: model configuration parameters num_frames: `int` number of frames in a single input. num_classes: `int` number of classes in dataset. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. [batch_size x num_frames x num_features] kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to None. activation: A `str` of name of the activation function. use_sync_bn: If True, use synchronized batch normalization. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. **kwargs: keyword arguments to be passed. """ self._self_setattr_tracking = False self._config_dict = { "input_specs": input_specs, "num_classes": num_classes, "num_frames": num_frames, "params": params } self._num_classes = num_classes self._input_specs = input_specs self._act_fn = tf_utils.get_activation(activation) if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization if tf.keras.backend.image_data_format() == "channels_last": bn_axis = -1 else: bn_axis = 1 # [batch_size x num_frames x num_features] feature_size = input_specs.shape[-1] # shape 'excluding' batch_size model_input = tf.keras.Input(shape=self._input_specs.shape[1:]) reshaped_input = tf.reshape(model_input, [-1, feature_size]) tf.summary.histogram("input_hist", model_input) # configure model if params.add_batch_norm: reshaped_input = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon, name="input_bn")(reshaped_input) # activation = reshaped input * cluster weights if params.cluster_size > 0: activation = layers.Dense( params.cluster_size, kernel_regularizer=kernel_regularizer, kernel_initializer=tf.random_normal_initializer( stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))( reshaped_input) if params.add_batch_norm: activation = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon, name="cluster_bn")(activation) else: cluster_biases = tf.Variable(tf.random_normal_initializer( stddev=1 / tf.math.sqrt(feature_size))(shape=[params.cluster_size]), name="cluster_biases") tf.summary.histogram("cluster_biases", cluster_biases) activation += cluster_biases activation = self._act_fn(activation) tf.summary.histogram("cluster_output", activation) if params.use_context_gate_cluster_layer: pooling_method = None norm_args = dict(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon, name="context_gate_bn") activation = utils.context_gate( activation, normalizer_fn=self._norm, normalizer_params=norm_args, pooling_method=pooling_method, hidden_layer_size=params.context_gate_cluster_bottleneck_size, kernel_regularizer=kernel_regularizer) activation = tf.reshape(activation, [-1, num_frames, params.cluster_size]) activation = utils.frame_pooling(activation, params.pooling_method) # activation = activation * hidden1_weights activation = layers.Dense( params.hidden_size, kernel_regularizer=kernel_regularizer, kernel_initializer=tf.random_normal_initializer( stddev=1 / tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(activation) if params.add_batch_norm: activation = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon, name="hidden1_bn")(activation) else: hidden1_biases = tf.Variable(tf.random_normal_initializer( stddev=0.01)(shape=[params.hidden_size]), name="hidden1_biases") tf.summary.histogram("hidden1_biases", hidden1_biases) activation += hidden1_biases activation = self._act_fn(activation) tf.summary.histogram("hidden1_output", activation) aggregated_model = getattr(yt8m_agg_models, params.yt8m_agg_classifier_model) norm_args = dict(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon) output = aggregated_model().create_model( model_input=activation, vocab_size=self._num_classes, num_mixtures=params.agg_model.num_mixtures, normalizer_fn=self._norm, normalizer_params=norm_args, l2_penalty=params.agg_model.l2_penalty) super().__init__(inputs=model_input, outputs=output.get("predictions"), **kwargs)
def __init__( self, level: Union[int, str], num_convs: int = 2, num_filters: int = 256, kernel_size: int = 3, use_depthwise_convolution: bool = False, upsample_factor: int = 1, low_level: Optional[List[int]] = None, low_level_num_filters: Optional[List[int]] = None, fusion_num_output_filters: int = 256, activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a panoptic deeplab head. Args: level: An `int` or `str`, level to use to build head. num_convs: An `int` number of stacked convolution before the last prediction layer. num_filters: An `int` number to specify the number of filters used. Default is 256. kernel_size: An `int` number to specify the kernel size of the stacked convolutions before the last prediction layer. use_depthwise_convolution: A bool to specify if use depthwise separable convolutions. upsample_factor: An `int` number to specify the upsampling factor to generate finer mask. Default 1 means no upsampling is applied. low_level: An `int` of backbone level to be used for feature fusion. It is used when feature_fusion is set to `deeplabv3plus`. low_level_num_filters: An `int` of reduced number of filters for the low level features before fusing it with higher level features. It is only used when feature_fusion is set to `deeplabv3plus`. fusion_num_output_filters: An `int` number to specify the number of filters used by output layer of fusion module. Default is 256. activation: A `str` that indicates which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: A `bool` that indicates whether to use synchronized batch normalization across different replicas. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default is None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. **kwargs: Additional keyword arguments to be passed. """ super(PanopticDeeplabHead, self).__init__(**kwargs) self._config_dict = { 'level': level, 'num_convs': num_convs, 'num_filters': num_filters, 'kernel_size': kernel_size, 'use_depthwise_convolution': use_depthwise_convolution, 'upsample_factor': upsample_factor, 'low_level': low_level, 'low_level_num_filters': low_level_num_filters, 'fusion_num_output_filters': fusion_num_output_filters, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]): """Build variables and child layers to prepare for calling.""" conv2d_quantized = _quantize_wrapped_layer( tf.keras.layers.Conv2D, configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'], False)) depthwise_conv2d_quantized = _quantize_wrapped_layer( tf.keras.layers.DepthwiseConv2D, configs.Default8BitConvQuantizeConfig(['depthwise_kernel'], ['activation'], False)) expand_filters = self._in_filters if self._expand_ratio > 1: # First 1x1 conv for channel expansion. expand_filters = nn_layers.make_divisible( self._in_filters * self._expand_ratio, self._divisible_by) expand_kernel = 1 if self._use_depthwise else self._kernel_size expand_stride = 1 if self._use_depthwise else self._strides self._conv0 = conv2d_quantized( filters=expand_filters, kernel_size=expand_kernel, strides=expand_stride, padding='same', use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm0 = self._norm_by_activation(self._activation)( axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon) self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(self._activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig()) if self._use_depthwise: # Depthwise conv. self._conv1 = depthwise_conv2d_quantized( kernel_size=(self._kernel_size, self._kernel_size), strides=self._strides, padding='same', depth_multiplier=1, dilation_rate=self._dilation_rate, use_bias=False, depthwise_initializer=self._kernel_initializer, depthwise_regularizer=self._depthsize_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm1 = self._norm_by_activation(self._depthwise_activation)( axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon) self._depthwise_activation_layer = ( tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(self._depthwise_activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig())) # Squeeze and excitation. if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: logging.info('Use Squeeze and excitation.') in_filters = self._in_filters if self._expand_se_in_filters: in_filters = expand_filters self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized( in_filters=in_filters, out_filters=expand_filters, se_ratio=self._se_ratio, divisible_by=self._divisible_by, round_down_protect=self._se_round_down_protect, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=self._se_inner_activation, gating_activation=self._se_gating_activation) else: self._squeeze_excitation = None # Last 1x1 conv. self._conv2 = conv2d_quantized( filters=self._out_filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm2 = self._norm_with_quantize(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon) if self._stochastic_depth_drop_rate: self._stochastic_depth = nn_layers.StochasticDepth( self._stochastic_depth_drop_rate) else: self._stochastic_depth = None self._add = tfmot.quantization.keras.QuantizeWrapperV2( tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [], True)) super(InvertedBottleneckBlockQuantized, self).build(input_shape)
def pretrain_model(bert_config, seq_length, max_predictions_per_seq, initializer=None, use_next_sentence_label=True, return_core_pretrainer_model=False): """Returns model to be used for pre-training. Args: bert_config: Configuration that defines the core BERT model. seq_length: Maximum sequence length of the training data. max_predictions_per_seq: Maximum number of tokens in sequence to mask out and use for pretraining. initializer: Initializer for weights in BertPretrainer. use_next_sentence_label: Whether to use the next sentence label. return_core_pretrainer_model: Whether to also return the `BertPretrainer` object. Returns: A Tuple of (1) Pretraining model, (2) core BERT submodel from which to save weights after pretraining, and (3) optional core `BertPretrainer` object if argument `return_core_pretrainer_model` is True. """ input_word_ids = tf.keras.layers.Input( shape=(seq_length,), name='input_word_ids', dtype=tf.int32) input_mask = tf.keras.layers.Input( shape=(seq_length,), name='input_mask', dtype=tf.int32) input_type_ids = tf.keras.layers.Input( shape=(seq_length,), name='input_type_ids', dtype=tf.int32) masked_lm_positions = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_positions', dtype=tf.int32) masked_lm_ids = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_ids', dtype=tf.int32) masked_lm_weights = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_weights', dtype=tf.int32) if use_next_sentence_label: next_sentence_labels = tf.keras.layers.Input( shape=(1,), name='next_sentence_labels', dtype=tf.int32) else: next_sentence_labels = None transformer_encoder = get_transformer_encoder(bert_config, seq_length) if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) pretrainer_model = models.BertPretrainer( network=transformer_encoder, embedding_table=transformer_encoder.get_embedding_table(), num_classes=2, # The next sentence prediction label has two classes. activation=tf_utils.get_activation(bert_config.hidden_act), num_token_predictions=max_predictions_per_seq, initializer=initializer, output='logits') outputs = pretrainer_model( [input_word_ids, input_mask, input_type_ids, masked_lm_positions]) lm_output = outputs['masked_lm'] sentence_output = outputs['classification'] pretrain_loss_layer = BertPretrainLossAndMetricLayer( vocab_size=bert_config.vocab_size) output_loss = pretrain_loss_layer(lm_output, sentence_output, masked_lm_ids, masked_lm_weights, next_sentence_labels) inputs = { 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids, 'masked_lm_positions': masked_lm_positions, 'masked_lm_ids': masked_lm_ids, 'masked_lm_weights': masked_lm_weights, } if use_next_sentence_label: inputs['next_sentence_labels'] = next_sentence_labels keras_model = tf.keras.Model(inputs=inputs, outputs=output_loss) if return_core_pretrainer_model: return keras_model, transformer_encoder, pretrainer_model else: return keras_model, transformer_encoder
def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]): """Build variables and child layers to prepare for calling.""" conv2d_quantized = _quantize_wrapped_layer( tf.keras.layers.Conv2D, configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'], False)) if self._use_projection: if self._resnetd_shortcut: self._shortcut0 = tf.keras.layers.AveragePooling2D( pool_size=2, strides=self._strides, padding='same') self._shortcut1 = conv2d_quantized( filters=self._filters * 4, kernel_size=1, strides=1, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) else: self._shortcut = conv2d_quantized( filters=self._filters * 4, kernel_size=1, strides=self._strides, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm0 = self._norm_with_quantize( axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon, trainable=self._bn_trainable) self._conv1 = conv2d_quantized( filters=self._filters, kernel_size=1, strides=1, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm1 = self._norm(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon, trainable=self._bn_trainable) self._activation1 = tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(self._activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig()) self._conv2 = conv2d_quantized( filters=self._filters, kernel_size=3, strides=self._strides, dilation_rate=self._dilation_rate, padding='same', use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm2 = self._norm(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon, trainable=self._bn_trainable) self._activation2 = tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(self._activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig()) self._conv3 = conv2d_quantized( filters=self._filters * 4, kernel_size=1, strides=1, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, activation=NoOpActivation()) self._norm3 = self._norm_with_quantize(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon, trainable=self._bn_trainable) self._activation3 = tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(self._activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig()) if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized( in_filters=self._filters * 4, out_filters=self._filters * 4, se_ratio=self._se_ratio, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer) else: self._squeeze_excitation = None if self._stochastic_depth_drop_rate: self._stochastic_depth = nn_layers.StochasticDepth( self._stochastic_depth_drop_rate) else: self._stochastic_depth = None self._add = tfmot.quantization.keras.QuantizeWrapperV2( tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [], True)) super(BottleneckBlockQuantized, self).build(input_shape)
def __init__(self, num_classes: int, level: Union[int, str], num_convs: int = 2, num_filters: int = 256, use_depthwise_convolution: bool = False, prediction_kernel_size: int = 1, upsample_factor: int = 1, feature_fusion: Optional[str] = None, decoder_min_level: Optional[int] = None, decoder_max_level: Optional[int] = None, low_level: int = 2, low_level_num_filters: int = 48, num_decoder_filters: int = 256, activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, bias_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a segmentation head. Args: num_classes: An `int` number of mask classification categories. The number of classes does not include background class. level: An `int` or `str`, level to use to build segmentation head. num_convs: An `int` number of stacked convolution before the last prediction layer. num_filters: An `int` number to specify the number of filters used. Default is 256. use_depthwise_convolution: A bool to specify if use depthwise separable convolutions. prediction_kernel_size: An `int` number to specify the kernel size of the prediction layer. upsample_factor: An `int` number to specify the upsampling factor to generate finer mask. Default 1 means no upsampling is applied. feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If `deeplabv3plus`, features from decoder_features[level] will be fused with low level feature maps from backbone. If `pyramid_fusion`, multiscale features will be resized and fused at the target level. decoder_min_level: An `int` of minimum level from decoder to use in feature fusion. It is only used when feature_fusion is set to `panoptic_fpn_fusion`. decoder_max_level: An `int` of maximum level from decoder to use in feature fusion. It is only used when feature_fusion is set to `panoptic_fpn_fusion`. low_level: An `int` of backbone level to be used for feature fusion. It is used when feature_fusion is set to `deeplabv3plus`. low_level_num_filters: An `int` of reduced number of filters for the low level features before fusing it with higher level features. It is only used when feature_fusion is set to `deeplabv3plus`. num_decoder_filters: An `int` of number of filters in the decoder outputs. It is only used when feature_fusion is set to `panoptic_fpn_fusion`. activation: A `str` that indicates which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: A `bool` that indicates whether to use synchronized batch normalization across different replicas. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default is None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. **kwargs: Additional keyword arguments to be passed. """ super().__init__(**kwargs) self._config_dict = { 'num_classes': num_classes, 'level': level, 'num_convs': num_convs, 'num_filters': num_filters, 'use_depthwise_convolution': use_depthwise_convolution, 'prediction_kernel_size': prediction_kernel_size, 'upsample_factor': upsample_factor, 'feature_fusion': feature_fusion, 'decoder_min_level': decoder_min_level, 'decoder_max_level': decoder_max_level, 'low_level': low_level, 'low_level_num_filters': low_level_num_filters, 'num_decoder_filters': num_decoder_filters, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2( tf_utils.get_activation(activation, use_keras_layer=True), configs.Default8BitActivationQuantizeConfig())
def get_mc_dropout_transformer_encoder(bert_config, use_mc_dropout_mha=False, use_mc_dropout_att=False, use_mc_dropout_ffn=False, channel_wise_dropout_mha=False, channel_wise_dropout_att=False, channel_wise_dropout_ffn=False): """Gets a DropoutTransformerEncoder from a bert_config object. Args: bert_config: A 'modeling.BertConfig' object. use_mc_dropout_mha: (bool) Whether to apply MC Dropout to the multi-head attention score layer. use_mc_dropout_att: (bool) Whether to apply MC Dropout to the attention output layer. use_mc_dropout_ffn: (bool) Whether to apply MC Dropout to the feedforward layer. channel_wise_dropout_mha: (bool) Whether to apply MC Dropout to the multi-head attention score layer. channel_wise_dropout_att: (bool) Whether to apply MC Dropout to the attention output layer. channel_wise_dropout_ffn: (bool) Whether to apply MC Dropout to the feedforward layer. Returns: A DropoutTransformerEncoder object. """ embedding_cfg = dict( vocab_size=bert_config.vocab_size, type_vocab_size=bert_config.type_vocab_size, hidden_size=bert_config.hidden_size, max_seq_length=bert_config.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range), dropout_rate=bert_config.hidden_dropout_prob, ) hidden_cfg = dict( num_attention_heads=bert_config.num_attention_heads, intermediate_size=bert_config.intermediate_size, intermediate_activation=tf_utils.get_activation( bert_config.hidden_act), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range), ) kwargs = dict( embedding_cfg=embedding_cfg, num_hidden_instances=bert_config.num_hidden_layers, pooled_output_dim=bert_config.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range)) return DropoutTransformerEncoder( use_mc_dropout_mha=use_mc_dropout_mha, use_mc_dropout_att=use_mc_dropout_att, use_mc_dropout_ffn=use_mc_dropout_ffn, channel_wise_dropout_mha=channel_wise_dropout_mha, channel_wise_dropout_att=channel_wise_dropout_att, channel_wise_dropout_ffn=channel_wise_dropout_ffn, hidden_cfg=hidden_cfg, **kwargs)
def __init__(self, model_id: int, input_specs: tf.keras.layers.InputSpec = layers.InputSpec( shape=[None, None, None, 3]), depth_multiplier: float = 1.0, stem_type: str = 'v0', resnetd_shortcut: bool = False, replace_stem_max_pool: bool = False, se_ratio: Optional[float] = None, init_stochastic_depth_rate: float = 0.0, activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_initializer: str = 'VarianceScaling', kernel_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, bias_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a ResNet model. Args: model_id: An `int` of the depth of ResNet backbone model. input_specs: A `tf.keras.layers.InputSpec` of the input tensor. depth_multiplier: A `float` of the depth multiplier to uniformaly scale up all layers in channel size. This argument is also referred to as `width_multiplier` in (https://arxiv.org/abs/2103.07579). stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187). resnetd_shortcut: A `bool` of whether to use ResNet-D shortcut in downsampling blocks. replace_stem_max_pool: A `bool` of whether to replace the max pool in stem with a stride-2 conv, se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. init_stochastic_depth_rate: A `float` of initial stochastic depth rate. activation: A `str` name of the activation function. use_sync_bn: If True, use synchronized batch normalization. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A small `float` added to variance to avoid dividing by zero. kernel_initializer: A str for kernel initializer of convolutional layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default to None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default to None. **kwargs: Additional keyword arguments to be passed. """ self._model_id = model_id self._input_specs = input_specs self._depth_multiplier = depth_multiplier self._stem_type = stem_type self._resnetd_shortcut = resnetd_shortcut self._replace_stem_max_pool = replace_stem_max_pool self._se_ratio = se_ratio self._init_stochastic_depth_rate = init_stochastic_depth_rate self._use_sync_bn = use_sync_bn self._activation = activation self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization self._kernel_initializer = kernel_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Build ResNet. inputs = tf.keras.Input(shape=input_specs.shape[1:]) if stem_type == 'v0': x = layers.Conv2D(filters=int(64 * self._depth_multiplier), kernel_size=7, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation, use_keras_layer=True)(x) elif stem_type == 'v1': x = layers.Conv2D(filters=int(32 * self._depth_multiplier), kernel_size=3, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation, use_keras_layer=True)(x) x = layers.Conv2D(filters=int(32 * self._depth_multiplier), kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation, use_keras_layer=True)(x) x = layers.Conv2D(filters=int(64 * self._depth_multiplier), kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation, use_keras_layer=True)(x) else: raise ValueError('Stem type {} not supported.'.format(stem_type)) if replace_stem_max_pool: x = layers.Conv2D(filters=int(64 * self._depth_multiplier), kernel_size=3, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation, use_keras_layer=True)(x) else: x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) endpoints = {} for i, spec in enumerate(RESNET_SPECS[model_id]): if spec[0] == 'residual': block_fn = nn_blocks.ResidualBlock elif spec[0] == 'bottleneck': block_fn = nn_blocks.BottleneckBlock else: raise ValueError('Block fn `{}` is not supported.'.format( spec[0])) x = self._block_group( inputs=x, filters=int(spec[1] * self._depth_multiplier), strides=(1 if i == 0 else 2), block_fn=block_fn, block_repeats=spec[2], stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 2, 5), name='block_group_l{}'.format(i + 2)) endpoints[str(i + 2)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} super(ResNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
def __init__( self, model_id: int, input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec( shape=[None, None, None, 3]), activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_initializer: str = 'VarianceScaling', kernel_regularizer: tf.keras.regularizers.Regularizer = None, **kwargs): """Initializes a RevNet model. Args: model_id: An `int` of depth/id of ResNet backbone model. input_specs: A `tf.keras.layers.InputSpec` of the input tensor. activation: A `str` name of the activation function. use_sync_bn: If True, use synchronized batch normalization. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_initializer: A str for kernel initializer of convolutional layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default to None. **kwargs: Additional keyword arguments to be passed. """ self._model_id = model_id self._input_specs = input_specs self._use_sync_bn = use_sync_bn self._activation = activation self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon self._kernel_initializer = kernel_initializer self._kernel_regularizer = kernel_regularizer if use_sync_bn: self._norm = tf.keras.layers.experimental.SyncBatchNormalization else: self._norm = tf.keras.layers.BatchNormalization axis = -1 if tf.keras.backend.image_data_format( ) == 'channels_last' else 1 # Build RevNet. inputs = tf.keras.Input(shape=input_specs.shape[1:]) x = tf.keras.layers.Conv2D( filters=REVNET_SPECS[model_id][0][1], kernel_size=7, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer)(inputs) x = self._norm(axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) endpoints = {} for i, spec in enumerate(REVNET_SPECS[model_id]): if spec[0] == 'residual': inner_block_fn = nn_blocks.ResidualInner elif spec[0] == 'bottleneck': inner_block_fn = nn_blocks.BottleneckResidualInner else: raise ValueError('Block fn `{}` is not supported.'.format( spec[0])) if spec[1] % 2 != 0: raise ValueError( 'Number of output filters must be even to ensure ' 'splitting in channel dimension for reversible blocks') x = self._block_group( inputs=x, filters=spec[1], strides=(1 if i == 0 else 2), inner_block_fn=inner_block_fn, block_repeats=spec[2], batch_norm_first=(i != 0), # Only skip on first block name='revblock_group_{}'.format(i + 2)) endpoints[str(i + 2)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} super(RevNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
def __init__(self, min_level, max_level, num_classes, num_anchors_per_location, num_convs=4, num_filters=256, use_separable_conv=False, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_regularizer=None, bias_regularizer=None, **kwargs): """Initializes a RetinaNet head. Args: min_level: An `int` number of minimum feature level. max_level: An `int` number of maximum feature level. num_classes: An `int` number of classes to predict. num_anchors_per_location: An `int` number of number of anchors per pixel location. num_convs: An `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: An `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: A `bool` that indicates whether the separable convolution layers is used. activation: A `str` that indicates which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: A `bool` that indicates whether to use synchronized batch normalization across different replicas. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default is None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. **kwargs: Additional keyword arguments to be passed. """ super(RetinaNetHead, self).__init__(**kwargs) self._config_dict = { 'min_level': min_level, 'max_level': max_level, 'num_classes': num_classes, 'num_anchors_per_location': num_anchors_per_location, 'num_convs': num_convs, 'num_filters': num_filters, 'use_separable_conv': use_separable_conv, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation = tf_utils.get_activation(activation)
def __init__(self, model_id, input_specs=layers.InputSpec(shape=[None, None, None, 3]), stem_type='v0', se_ratio=None, init_stochastic_depth_rate=0.0, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, **kwargs): """ResNet initialization function. Args: model_id: `int` depth of ResNet backbone model. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. stem_type: `str` stem type of ResNet. Default to `v0`. If set to `v1`, use ResNet-C type stem (https://arxiv.org/abs/1812.01187). se_ratio: `float` or None. Ratio of the Squeeze-and-Excitation layer. init_stochastic_depth_rate: `float` initial stochastic depth rate. activation: `str` name of the activation function. use_sync_bn: if True, use synchronized batch normalization. norm_momentum: `float` normalization omentum for the moving average. norm_epsilon: `float` small float added to variance to avoid dividing by zero. kernel_initializer: kernel_initializer for convolutional layers. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. Default to None. bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. Default to None. **kwargs: keyword arguments to be passed. """ self._model_id = model_id self._input_specs = input_specs self._stem_type = stem_type self._se_ratio = se_ratio self._init_stochastic_depth_rate = init_stochastic_depth_rate self._use_sync_bn = use_sync_bn self._activation = activation self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization self._kernel_initializer = kernel_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Build ResNet. inputs = tf.keras.Input(shape=input_specs.shape[1:]) if stem_type == 'v0': x = layers.Conv2D(filters=64, kernel_size=7, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) elif stem_type == 'v1': x = layers.Conv2D(filters=32, kernel_size=3, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) x = layers.Conv2D(filters=32, kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) x = layers.Conv2D(filters=64, kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) else: raise ValueError('Stem type {} not supported.'.format(stem_type)) x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) endpoints = {} for i, spec in enumerate(RESNET_SPECS[model_id]): if spec[0] == 'residual': block_fn = nn_blocks.ResidualBlock elif spec[0] == 'bottleneck': block_fn = nn_blocks.BottleneckBlock else: raise ValueError('Block fn `{}` is not supported.'.format( spec[0])) x = self._block_group( inputs=x, filters=spec[1], strides=(1 if i == 0 else 2), block_fn=block_fn, block_repeats=spec[2], stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 2, 5), name='block_group_l{}'.format(i + 2)) endpoints[str(i + 2)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} super(ResNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
def get_transformer_encoder(bert_config, sequence_length, transformer_encoder_cls=None, output_range=None): """Gets a 'TransformerEncoder' object. Args: bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object. sequence_length: Maximum sequence length of the training data. transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the default BERT encoder implementation. output_range: the sequence output range, [0, output_range). Default setting is to return the entire sequence output. Returns: A networks.TransformerEncoder object. """ if transformer_encoder_cls is not None: # TODO(hongkuny): evaluate if it is better to put cfg definition in gin. embedding_cfg = dict( vocab_size=bert_config.vocab_size, type_vocab_size=bert_config.type_vocab_size, hidden_size=bert_config.hidden_size, seq_length=sequence_length, max_seq_length=bert_config.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range), dropout_rate=bert_config.hidden_dropout_prob, ) hidden_cfg = dict( num_attention_heads=bert_config.num_attention_heads, intermediate_size=bert_config.intermediate_size, intermediate_activation=tf_utils.get_activation(bert_config.hidden_act), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range), ) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cfg=hidden_cfg, num_hidden_instances=bert_config.num_hidden_layers, pooled_output_dim=bert_config.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range)) # Relies on gin configuration to define the Transformer encoder arguments. return transformer_encoder_cls(**kwargs) kwargs = dict( vocab_size=bert_config.vocab_size, hidden_size=bert_config.hidden_size, num_layers=bert_config.num_hidden_layers, num_attention_heads=bert_config.num_attention_heads, intermediate_size=bert_config.intermediate_size, activation=tf_utils.get_activation(bert_config.hidden_act), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, sequence_length=sequence_length, max_sequence_length=bert_config.max_position_embeddings, type_vocab_size=bert_config.type_vocab_size, embedding_width=bert_config.embedding_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range)) if isinstance(bert_config, albert_configs.AlbertConfig): return networks.AlbertTransformerEncoder(**kwargs) else: assert isinstance(bert_config, configs.BertConfig) kwargs['output_range'] = output_range return networks.TransformerEncoder(**kwargs)
def mb_conv_block(inputs: tf.Tensor, block: BlockConfig, config: ModelConfig, prefix: Optional[Text] = None): """Mobile Inverted Residual Bottleneck. Args: inputs: the Keras input to the block block: BlockConfig, arguments to create a Block config: ModelConfig, a set of model parameters prefix: prefix for naming all layers Returns: the output of the block """ use_se = config.use_se activation = tf_utils.get_activation(config.activation) drop_connect_rate = config.drop_connect_rate data_format = tf.keras.backend.image_data_format() use_depthwise = block.conv_type != 'no_depthwise' prefix = prefix or '' filters = block.input_filters * block.expand_ratio x = inputs if block.fused_conv: # If we use fused mbconv, skip expansion and use regular conv. x = conv2d_block(x, filters, config, kernel_size=block.kernel_size, strides=block.strides, activation=activation, name=prefix + 'fused') else: if block.expand_ratio != 1: # Expansion phase kernel_size = (1, 1) if use_depthwise else (3, 3) x = conv2d_block(x, filters, config, kernel_size=kernel_size, activation=activation, name=prefix + 'expand') # Depthwise Convolution if use_depthwise: x = conv2d_block(x, conv_filters=None, config=config, kernel_size=block.kernel_size, strides=block.strides, activation=activation, depthwise=True, name=prefix + 'depthwise') # Squeeze and Excitation phase if use_se: assert block.se_ratio is not None assert 0 < block.se_ratio <= 1 num_reduced_filters = max(1, int(block.input_filters * block.se_ratio)) if data_format == 'channels_first': se_shape = (filters, 1, 1) else: se_shape = (1, 1, filters) se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se) se = conv2d_block(se, num_reduced_filters, config, use_bias=True, use_batch_norm=False, activation=activation, name=prefix + 'se_reduce') se = conv2d_block(se, filters, config, use_bias=True, use_batch_norm=False, activation='sigmoid', name=prefix + 'se_expand') x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite') # Output phase x = conv2d_block(x, block.output_filters, config, activation=None, name=prefix + 'project') # Add identity so that quantization-aware training can insert quantization # ops correctly. x = tf.keras.layers.Activation(tf_utils.get_activation('identity'), name=prefix + 'id')(x) if (block.id_skip and all(s == 1 for s in block.strides) and block.input_filters == block.output_filters): if drop_connect_rate and drop_connect_rate > 0: # Apply dropconnect # The only difference between dropout and dropconnect in TF is scaling by # drop_connect_rate during training. See: # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612 x = tf.keras.layers.Dropout(drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(x) x = tf.keras.layers.add([x, inputs], name=prefix + 'add') return x
def _make_block(self, input_tensor, first_block=True, filters=64, stride=2, radix=1, avd=False, avd_first=False, is_first=False): x = input_tensor inplanes = input_tensor.shape[-1] if stride != 1 or inplanes != filters * self.block_expansion: short_cut = input_tensor if self.avg_down: if self.dilation == 1: short_cut = AveragePooling2D( pool_size=stride, strides=stride, padding="same", data_format="channels_last")(short_cut) else: short_cut = AveragePooling2D( pool_size=1, strides=1, padding="same", data_format="channels_last")(short_cut) short_cut = Conv2D(filters * self.block_expansion, kernel_size=1, strides=1, padding="same", kernel_initializer="he_normal", use_bias=False, data_format="channels_last")(short_cut) else: short_cut = Conv2D(filters * self.block_expansion, kernel_size=1, strides=stride, padding="same", kernel_initializer="he_normal", use_bias=False, data_format="channels_last")(short_cut) short_cut = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(short_cut) else: short_cut = input_tensor # should the above be in make layer? # see https://github.com/zhanghang1989/ResNeSt/blob/master/resnest/torch/resnet.py group_width = int(filters * (self.bottleneck_width / 64.0)) * self.cardinality x = Conv2D(group_width, kernel_size=1, strides=1, padding="same", kernel_initializer="he_normal", use_bias=False, data_format="channels_last")(x) x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) avd = avd and (stride > 1 or is_first) if avd: avd_layer = AveragePooling2D(pool_size=3, strides=stride, padding="same", data_format="channels_last") stride = 1 if avd and avd_first: x = avd_layer(x) if radix >= 1: x = self._SplAtConv2d(x, filters=group_width, kernel_size=3, stride=stride, dilation=self.dilation, groups=self.cardinality, radix=radix) else: x = Conv2D(group_width, kernel_size=3, strides=stride, padding="same", kernel_initializer="he_normal", dilation_rate=self.dilation, use_bias=False, data_format="channels_last")(x) x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) x = tf_utils.get_activation(self.activation)(x) if avd and not avd_first: x = avd_layer(x) # print('can') x = Conv2D(filters * self.block_expansion, kernel_size=1, strides=1, padding="same", kernel_initializer="he_normal", dilation_rate=self.dilation, use_bias=False, data_format="channels_last")(x) x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x) m2 = Add()([x, short_cut]) m2 = tf_utils.get_activation(self.activation)(m2) return m2
def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras """Creates an EfficientNet graph given the model parameters. This function is wrapped by the `EfficientNet` class to make a tf.keras.Model. Args: image_input: the input batch of images config: the model config Returns: the output of efficientnet """ depth_coefficient = config.depth_coefficient blocks = config.blocks stem_base_filters = config.stem_base_filters top_base_filters = config.top_base_filters activation = tf_utils.get_activation(config.activation) dropout_rate = config.dropout_rate drop_connect_rate = config.drop_connect_rate num_classes = config.num_classes input_channels = config.input_channels rescale_input = config.rescale_input data_format = tf.keras.backend.image_data_format() dtype = config.dtype weight_decay = config.weight_decay x = image_input if data_format == 'channels_first': # Happens on GPU/TPU if available. x = tf.keras.layers.Permute((3, 1, 2))(x) if rescale_input: x = preprocessing.normalize_images(x, num_channels=input_channels, dtype=dtype, data_format=data_format) # Build stem x = conv2d_block(x, round_filters(stem_base_filters, config), config, kernel_size=[3, 3], strides=[2, 2], activation=activation, name='stem') # Build blocks num_blocks_total = sum( round_repeats(block.num_repeat, depth_coefficient) for block in blocks) block_num = 0 for stack_idx, block in enumerate(blocks): assert block.num_repeat > 0 # Update block input and output filters based on depth multiplier block = block.replace( input_filters=round_filters(block.input_filters, config), output_filters=round_filters(block.output_filters, config), num_repeat=round_repeats(block.num_repeat, depth_coefficient)) # The first block needs to take care of stride and filter size increase drop_rate = drop_connect_rate * float(block_num) / num_blocks_total config = config.replace(drop_connect_rate=drop_rate) block_prefix = 'stack_{}/block_0/'.format(stack_idx) x = mb_conv_block(x, block, config, block_prefix) block_num += 1 if block.num_repeat > 1: block = block.replace(input_filters=block.output_filters, strides=[1, 1]) for block_idx in range(block.num_repeat - 1): drop_rate = drop_connect_rate * float( block_num) / num_blocks_total config = config.replace(drop_connect_rate=drop_rate) block_prefix = 'stack_{}/block_{}/'.format( stack_idx, block_idx + 1) x = mb_conv_block(x, block, config, prefix=block_prefix) block_num += 1 # Build top x = conv2d_block(x, round_filters(top_base_filters, config), config, activation=activation, name='top') # Build classifier x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x) if dropout_rate and dropout_rate > 0: x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x) x = tf.keras.layers.Dense( num_classes, kernel_initializer=DENSE_KERNEL_INITIALIZER, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), bias_regularizer=tf.keras.regularizers.l2(weight_decay), name='logits')(x) x = tf.keras.layers.Activation('softmax', name='probs')(x) return x
def build_encoder(config: EncoderConfig, embedding_layer: Optional[tf.keras.layers.Layer] = None, encoder_cls=None, bypass_config: bool = False): """Instantiate a Transformer encoder network from EncoderConfig. Args: config: the one-of encoder config, which provides encoder parameters of a chosen encoder. embedding_layer: an external embedding layer passed to the encoder. encoder_cls: an external encoder cls not included in the supported encoders, usually used by gin.configurable. bypass_config: whether to ignore config instance to create the object with `encoder_cls`. Returns: An encoder instance. """ if bypass_config: return encoder_cls() encoder_type = config.type encoder_cfg = config.get() if encoder_cls and encoder_cls.__name__ == "EncoderScaffold": embedding_cfg = dict( vocab_size=encoder_cfg.vocab_size, type_vocab_size=encoder_cfg.type_vocab_size, hidden_size=encoder_cfg.hidden_size, max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dropout_rate=encoder_cfg.dropout_rate, ) hidden_cfg = dict( num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_activation=tf_utils.get_activation( encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), ) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cfg=hidden_cfg, num_hidden_instances=encoder_cfg.num_layers, pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs, dict_outputs=True) return encoder_cls(**kwargs) if encoder_type == "any": encoder = encoder_cfg.BUILDER(encoder_cfg) if not isinstance(encoder, (tf.Module, tf.keras.Model, tf.keras.layers.Layer)): raise ValueError("The BUILDER returns an unexpected instance. The " "`build_encoder` should returns a tf.Module, " "tf.keras.Model or tf.keras.layers.Layer. However, " f"we get {encoder.__class__}") return encoder if encoder_type == "mobilebert": return networks.MobileBERTEncoder( word_vocab_size=encoder_cfg.word_vocab_size, word_embed_size=encoder_cfg.word_embed_size, type_vocab_size=encoder_cfg.type_vocab_size, max_sequence_length=encoder_cfg.max_sequence_length, num_blocks=encoder_cfg.num_blocks, hidden_size=encoder_cfg.hidden_size, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_act_fn=encoder_cfg.hidden_activation, hidden_dropout_prob=encoder_cfg.hidden_dropout_prob, attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob, intra_bottleneck_size=encoder_cfg.intra_bottleneck_size, initializer_range=encoder_cfg.initializer_range, use_bottleneck_attention=encoder_cfg.use_bottleneck_attention, key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck, num_feedforward_networks=encoder_cfg.num_feedforward_networks, normalization_type=encoder_cfg.normalization_type, classifier_activation=encoder_cfg.classifier_activation, input_mask_dtype=encoder_cfg.input_mask_dtype) if encoder_type == "albert": return networks.AlbertEncoder( vocab_size=encoder_cfg.vocab_size, embedding_width=encoder_cfg.embedding_width, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, max_sequence_length=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dict_outputs=True) if encoder_type == "bigbird": # TODO(frederickliu): Support use_gradient_checkpointing and update # experiments to use the EncoderScaffold only. if encoder_cfg.use_gradient_checkpointing: return bigbird_encoder.BigBirdEncoder( vocab_size=encoder_cfg.vocab_size, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, num_rand_blocks=encoder_cfg.num_rand_blocks, block_size=encoder_cfg.block_size, max_position_embeddings=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), embedding_width=encoder_cfg.embedding_width, use_gradient_checkpointing=encoder_cfg.use_gradient_checkpointing) embedding_cfg = dict( vocab_size=encoder_cfg.vocab_size, type_vocab_size=encoder_cfg.type_vocab_size, hidden_size=encoder_cfg.hidden_size, max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dropout_rate=encoder_cfg.dropout_rate) attention_cfg = dict( num_heads=encoder_cfg.num_attention_heads, key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads), kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), max_rand_mask_length=encoder_cfg.max_position_embeddings, num_rand_blocks=encoder_cfg.num_rand_blocks, from_block_size=encoder_cfg.block_size, to_block_size=encoder_cfg.block_size, ) hidden_cfg = dict( num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_activation=tf_utils.get_activation( encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, norm_first=encoder_cfg.norm_first, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), attention_cls=layers.BigBirdAttention, attention_cfg=attention_cfg) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cls=layers.TransformerScaffold, hidden_cfg=hidden_cfg, num_hidden_instances=encoder_cfg.num_layers, mask_cls=layers.BigBirdMasks, mask_cfg=dict(block_size=encoder_cfg.block_size), pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), return_all_layer_outputs=False, dict_outputs=True, layer_idx_as_attention_seed=True) return networks.EncoderScaffold(**kwargs) if encoder_type == "kernel": embedding_cfg = dict( vocab_size=encoder_cfg.vocab_size, type_vocab_size=encoder_cfg.type_vocab_size, hidden_size=encoder_cfg.hidden_size, max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dropout_rate=encoder_cfg.dropout_rate) attention_cfg = dict( num_heads=encoder_cfg.num_attention_heads, key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads), kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), feature_transform=encoder_cfg.feature_transform, num_random_features=encoder_cfg.num_random_features, redraw=encoder_cfg.redraw, is_short_seq=encoder_cfg.is_short_seq, begin_kernel=encoder_cfg.begin_kernel, scale=encoder_cfg.scale, ) hidden_cfg = dict( num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, intermediate_activation=tf_utils.get_activation( encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, norm_first=encoder_cfg.norm_first, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), attention_cls=layers.KernelAttention, attention_cfg=attention_cfg) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cls=layers.TransformerScaffold, hidden_cfg=hidden_cfg, num_hidden_instances=encoder_cfg.num_layers, mask_cls=layers.KernelMask, pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), return_all_layer_outputs=False, dict_outputs=True, layer_idx_as_attention_seed=True) return networks.EncoderScaffold(**kwargs) if encoder_type == "xlnet": return networks.XLNetBase( vocab_size=encoder_cfg.vocab_size, num_layers=encoder_cfg.num_layers, hidden_size=encoder_cfg.hidden_size, num_attention_heads=encoder_cfg.num_attention_heads, head_size=encoder_cfg.head_size, inner_size=encoder_cfg.inner_size, dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, attention_type=encoder_cfg.attention_type, bi_data=encoder_cfg.bi_data, two_stream=encoder_cfg.two_stream, tie_attention_biases=encoder_cfg.tie_attention_biases, memory_length=encoder_cfg.memory_length, clamp_length=encoder_cfg.clamp_length, reuse_length=encoder_cfg.reuse_length, inner_activation=encoder_cfg.inner_activation, use_cls_mask=encoder_cfg.use_cls_mask, embedding_width=encoder_cfg.embedding_width, initializer=tf.keras.initializers.RandomNormal( stddev=encoder_cfg.initializer_range)) if encoder_type == "reuse": embedding_cfg = dict( vocab_size=encoder_cfg.vocab_size, type_vocab_size=encoder_cfg.type_vocab_size, hidden_size=encoder_cfg.hidden_size, max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), dropout_rate=encoder_cfg.dropout_rate) hidden_cfg = dict( num_attention_heads=encoder_cfg.num_attention_heads, inner_dim=encoder_cfg.intermediate_size, inner_activation=tf_utils.get_activation( encoder_cfg.hidden_activation), output_dropout=encoder_cfg.dropout_rate, attention_dropout=encoder_cfg.attention_dropout_rate, norm_first=encoder_cfg.norm_first, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), reuse_attention=encoder_cfg.reuse_attention, use_relative_pe=encoder_cfg.use_relative_pe, pe_max_seq_length=encoder_cfg.pe_max_seq_length, max_reuse_layer_idx=encoder_cfg.max_reuse_layer_idx) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cls=layers.ReuseTransformer, hidden_cfg=hidden_cfg, num_hidden_instances=encoder_cfg.num_layers, pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), return_all_layer_outputs=False, dict_outputs=True, feed_layer_idx=True, recursive=True) return networks.EncoderScaffold(**kwargs) bert_encoder_cls = networks.BertEncoder if encoder_type == "bert_v2": bert_encoder_cls = networks.BertEncoderV2 # Uses the default BERTEncoder configuration schema to create the encoder. # If it does not match, please add a switch branch by the encoder type. return bert_encoder_cls( vocab_size=encoder_cfg.vocab_size, hidden_size=encoder_cfg.hidden_size, num_layers=encoder_cfg.num_layers, num_attention_heads=encoder_cfg.num_attention_heads, intermediate_size=encoder_cfg.intermediate_size, activation=tf_utils.get_activation(encoder_cfg.hidden_activation), dropout_rate=encoder_cfg.dropout_rate, attention_dropout_rate=encoder_cfg.attention_dropout_rate, max_sequence_length=encoder_cfg.max_position_embeddings, type_vocab_size=encoder_cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range), output_range=encoder_cfg.output_range, embedding_width=encoder_cfg.embedding_size, embedding_layer=embedding_layer, return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs, dict_outputs=True, norm_first=encoder_cfg.norm_first)
def __init__(self, min_level: int, max_level: int, num_classes: int, num_anchors_per_location: int, num_convs: int = 4, num_filters: int = 256, attribute_heads: Optional[List[Dict[str, Any]]] = None, use_separable_conv: bool = False, activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, bias_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, num_params_per_anchor: int = 4, **kwargs): """Initializes a RetinaNet head. Args: min_level: An `int` number of minimum feature level. max_level: An `int` number of maximum feature level. num_classes: An `int` number of classes to predict. num_anchors_per_location: An `int` number of number of anchors per pixel location. num_convs: An `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: An `int` number that represents the number of filters of the intermediate conv layers. attribute_heads: If not None, a list that contains a dict for each additional attribute head. Each dict consists of 3 key-value pairs: `name`, `type` ('regression' or 'classification'), and `size` (number of predicted values for each instance). use_separable_conv: A `bool` that indicates whether the separable convolution layers is used. activation: A `str` that indicates which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: A `bool` that indicates whether to use synchronized batch normalization across different replicas. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default is None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. num_params_per_anchor: Number of parameters required to specify an anchor box. For example, `num_params_per_anchor` would be 4 for axis-aligned anchor boxes specified by their y-centers, x-centers, heights, and widths. **kwargs: Additional keyword arguments to be passed. """ super(RetinaNetHead, self).__init__(**kwargs) self._config_dict = { 'min_level': min_level, 'max_level': max_level, 'num_classes': num_classes, 'num_anchors_per_location': num_anchors_per_location, 'num_convs': num_convs, 'num_filters': num_filters, 'attribute_heads': attribute_heads, 'use_separable_conv': use_separable_conv, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, 'num_params_per_anchor': num_params_per_anchor, } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation = tf_utils.get_activation(activation)
def _resample_with_alpha(self, inputs, input_width, input_block_fn, target_width, target_num_filters, target_block_fn, alpha=0.5): """Matches resolution and feature dimension.""" _, _, _, input_num_filters = inputs.get_shape().as_list() if input_block_fn == 'bottleneck': input_num_filters /= 4 new_num_filters = int(input_num_filters * alpha) x = layers.Conv2D(filters=new_num_filters, kernel_size=1, strides=1, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon)(x) x = tf_utils.get_activation(self._activation_fn)(x) # Spatial resampling. if input_width > target_width: x = layers.Conv2D(filters=new_num_filters, kernel_size=3, strides=2, padding='SAME', use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon)(x) x = tf_utils.get_activation(self._activation_fn)(x) input_width /= 2 while input_width > target_width: x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x) input_width /= 2 elif input_width < target_width: scale = target_width // input_width x = spatial_transform_ops.nearest_upsampling(x, scale=scale) # Last 1x1 conv to match filter size. if target_block_fn == 'bottleneck': target_num_filters *= 4 x = layers.Conv2D(filters=target_num_filters, kernel_size=1, strides=1, use_bias=False, kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(x) x = self._norm(axis=self._bn_axis, momentum=self._norm_momentum, epsilon=self._norm_epsilon)(x) return x
def __init__(self, model_id, output_stride, input_specs=layers.InputSpec(shape=[None, None, None, 3]), stem_type='v0', se_ratio=None, init_stochastic_depth_rate=0.0, multigrid=None, last_stage_repeats=1, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, **kwargs): """Initializes a ResNet model with DeepLab modification. Args: model_id: An `int` specifies depth of ResNet backbone model. output_stride: An `int` of output stride, ratio of input to output resolution. input_specs: A `tf.keras.layers.InputSpec` of the input tensor. stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7 conv by 3 3x3 convs. se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. init_stochastic_depth_rate: A `float` of initial stochastic depth rate. multigrid: A tuple of the same length as the number of blocks in the last resnet stage. last_stage_repeats: An `int` that specifies how many times last stage is repeated. activation: A `str` name of the activation function. use_sync_bn: If True, use synchronized batch normalization. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_initializer: A str for kernel initializer of convolutional layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default to None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default to None. **kwargs: Additional keyword arguments to be passed. """ self._model_id = model_id self._output_stride = output_stride self._input_specs = input_specs self._use_sync_bn = use_sync_bn self._activation = activation self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization self._kernel_initializer = kernel_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer self._stem_type = stem_type self._se_ratio = se_ratio self._init_stochastic_depth_rate = init_stochastic_depth_rate if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Build ResNet. inputs = tf.keras.Input(shape=input_specs.shape[1:]) if stem_type == 'v0': x = layers.Conv2D( filters=64, kernel_size=7, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)( inputs) x = self._norm( axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( x) x = tf_utils.get_activation(activation)(x) elif stem_type == 'v1': x = layers.Conv2D( filters=64, kernel_size=3, strides=2, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)( inputs) x = self._norm( axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( x) x = tf_utils.get_activation(activation)(x) x = layers.Conv2D( filters=64, kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)( x) x = self._norm( axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( x) x = tf_utils.get_activation(activation)(x) x = layers.Conv2D( filters=128, kernel_size=3, strides=1, use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)( x) x = self._norm( axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( x) x = tf_utils.get_activation(activation)(x) else: raise ValueError('Stem type {} not supported.'.format(stem_type)) x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) normal_resnet_stage = int(np.math.log2(self._output_stride)) - 2 endpoints = {} for i in range(normal_resnet_stage + 1): spec = RESNET_SPECS[model_id][i] if spec[0] == 'bottleneck': block_fn = nn_blocks.BottleneckBlock else: raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) x = self._block_group( inputs=x, filters=spec[1], strides=(1 if i == 0 else 2), dilation_rate=1, block_fn=block_fn, block_repeats=spec[2], stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats), name='block_group_l{}'.format(i + 2)) endpoints[str(i + 2)] = x dilation_rate = 2 for i in range(normal_resnet_stage + 1, 3 + last_stage_repeats): spec = RESNET_SPECS[model_id][i] if i < 3 else RESNET_SPECS[model_id][-1] if spec[0] == 'bottleneck': block_fn = nn_blocks.BottleneckBlock else: raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) x = self._block_group( inputs=x, filters=spec[1], strides=1, dilation_rate=dilation_rate, block_fn=block_fn, block_repeats=spec[2], stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats), multigrid=multigrid if i >= 3 else None, name='block_group_l{}'.format(i + 2)) dilation_rate *= 2 endpoints[str(normal_resnet_stage + 2)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} super(DilatedResNet, self).__init__( inputs=inputs, outputs=endpoints, **kwargs)
def __init__( self, model_id: int, temporal_strides: List[int], temporal_kernel_sizes: List[Tuple[int]], use_self_gating: List[int] = None, input_specs=layers.InputSpec(shape=[None, None, None, None, 3]), stem_conv_temporal_kernel_size=5, stem_conv_temporal_stride=2, stem_pool_temporal_stride=2, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, **kwargs): """ResNet3D initialization function. Args: model_id: `int` depth of ResNet backbone model. temporal_strides: a list of integers that specifies the temporal strides for all 3d blocks. temporal_kernel_sizes: a list of tuples that specifies the temporal kernel sizes for all 3d blocks in different block groups. use_self_gating: a list of booleans to specify applying self-gating module or not in each block group. If None, self-gating is not applied. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. stem_conv_temporal_kernel_size: `int` temporal kernel size for the first conv layer. stem_conv_temporal_stride: `int` temporal stride for the first conv layer. stem_pool_temporal_stride: `int` temporal stride for the first pool layer. activation: `str` name of the activation function. use_sync_bn: if True, use synchronized batch normalization. norm_momentum: `float` normalization omentum for the moving average. norm_epsilon: `float` small float added to variance to avoid dividing by zero. kernel_initializer: kernel_initializer for convolutional layers. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. Default to None. bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. Default to None. **kwargs: keyword arguments to be passed. """ self._model_id = model_id self._temporal_strides = temporal_strides self._temporal_kernel_sizes = temporal_kernel_sizes self._input_specs = input_specs self._stem_conv_temporal_kernel_size = stem_conv_temporal_kernel_size self._stem_conv_temporal_stride = stem_conv_temporal_stride self._stem_pool_temporal_stride = stem_pool_temporal_stride self._use_self_gating = use_self_gating self._use_sync_bn = use_sync_bn self._activation = activation self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon if use_sync_bn: self._norm = layers.experimental.SyncBatchNormalization else: self._norm = layers.BatchNormalization self._kernel_initializer = kernel_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Build ResNet3D backbone. inputs = tf.keras.Input(shape=input_specs.shape[1:]) # Build stem. x = layers.Conv3D(filters=64, kernel_size=[stem_conv_temporal_kernel_size, 7, 7], strides=[stem_conv_temporal_stride, 2, 2], use_bias=False, padding='same', kernel_initializer=self._kernel_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer)(inputs) x = self._norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) x = tf_utils.get_activation(activation)(x) temporal_kernel_size = 1 if stem_pool_temporal_stride == 1 else 3 x = layers.MaxPool3D(pool_size=[temporal_kernel_size, 3, 3], strides=[stem_pool_temporal_stride, 2, 2], padding='same')(x) # Build intermediate blocks and endpoints. resnet_specs = RESNET_SPECS[model_id] if len(temporal_strides) != len(resnet_specs) or len( temporal_kernel_sizes) != len(resnet_specs): raise ValueError( 'Number of blocks in temporal specs should equal to resnet_specs.' ) endpoints = {} for i, resnet_spec in enumerate(resnet_specs): if resnet_spec[0] == 'bottleneck3d': block_fn = nn_blocks_3d.BottleneckBlock3D else: raise ValueError('Block fn `{}` is not supported.'.format( resnet_spec[0])) x = self._block_group( inputs=x, filters=resnet_spec[1], temporal_kernel_sizes=temporal_kernel_sizes[i], temporal_strides=temporal_strides[i], spatial_strides=(1 if i == 0 else 2), block_fn=block_fn, block_repeats=resnet_spec[2], use_self_gating=use_self_gating[i] if use_self_gating else False, name='block_group_l{}'.format(i + 2)) endpoints[i + 2] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} super(ResNet3D, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
def _build_scale_permuted_network(self, net, input_width, weighted_fusion=False): """Builds scale-permuted network.""" net_sizes = [ int(math.ceil(input_width / 2)), int(math.ceil(input_width / 2**2)) ] num_outgoing_connections = [0] * len(net) endpoints = {} for i, block_spec in enumerate(self._block_specs): # Update block level if it is larger than max_level to avoid building # blocks smaller than requested. block_spec.level = min(block_spec.level, self._max_level) # Find out specs for the target block. target_width = int(math.ceil(input_width / 2**block_spec.level)) target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] * self._filter_size_scale) # Resample then merge input0 and input1. parents = [] input0 = block_spec.input_offsets[0] input1 = block_spec.input_offsets[1] x0 = self._resample_with_sepconv( inputs=net[input0], input_width=net_sizes[input0], target_width=target_width, target_num_filters=target_num_filters) parents.append(x0) num_outgoing_connections[input0] += 1 x1 = self._resample_with_sepconv( inputs=net[input1], input_width=net_sizes[input1], target_width=target_width, target_num_filters=target_num_filters) parents.append(x1) num_outgoing_connections[input1] += 1 # Merge 0 outdegree blocks to the output block. if block_spec.is_output: for j, (j_feat, j_connections) in enumerate( zip(net, num_outgoing_connections)): if j_connections == 0 and (j_feat.shape[2] == target_width and j_feat.shape[3] == x0.shape[3]): parents.append(j_feat) num_outgoing_connections[j] += 1 # pylint: disable=g-direct-tensorflow-import if weighted_fusion: dtype = parents[0].dtype parent_weights = [ tf.nn.relu( tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format( i, j)), dtype=dtype)) for j in range(len(parents)) ] weights_sum = parent_weights[0] for adder in parent_weights[1:]: weights_sum = layers.Add()([weights_sum, adder]) parents = [ parents[i] * parent_weights[i] / (weights_sum + 0.0001) for i in range(len(parents)) ] # Fuse all parent nodes then build a new block. x = parents[0] for adder in parents[1:]: x = layers.Add()([x, adder]) x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x) x = self._block_group( inputs=x, in_filters=target_num_filters, out_filters=target_num_filters, strides=1, se_ratio=self._se_ratio, expand_ratio=self._expand_ratio, block_repeats=self._block_repeats, stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( self._init_stochastic_depth_rate, i + 1, len(self._block_specs)), name='scale_permuted_block_{}'.format(i + 1)) net.append(x) net_sizes.append(target_width) num_outgoing_connections.append(0) # Save output feats. if block_spec.is_output: if block_spec.level in endpoints: raise ValueError( 'Duplicate feats found for output level {}.'.format( block_spec.level)) if (block_spec.level < self._min_level or block_spec.level > self._max_level): logging.warning( 'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.', self._min_level, self._max_level) endpoints[str(block_spec.level)] = x return endpoints
def __init__(self, input_specs: Mapping[str, tf.TensorShape], min_level: int = 3, max_level: int = 7, num_filters: int = 256, fusion_type: str = 'sum', use_separable_conv: bool = False, activation: str = 'relu', use_sync_bn: bool = False, norm_momentum: float = 0.99, norm_epsilon: float = 0.001, kernel_initializer: str = 'VarianceScaling', kernel_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, bias_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, **kwargs): """Initializes a Feature Pyramid Network (FPN). Args: input_specs: A `dict` of input specifications. A dictionary consists of {level: TensorShape} from a backbone. min_level: An `int` of minimum level in FPN output feature maps. max_level: An `int` of maximum level in FPN output feature maps. num_filters: An `int` number of filters in FPN layers. fusion_type: A `str` of `sum` or `concat`. Whether performing sum or concat for feature fusion. use_separable_conv: A `bool`. If True use separable convolution for convolution in FPN layers. activation: A `str` name of the activation function. use_sync_bn: A `bool`. If True, use synchronized batch normalization. norm_momentum: A `float` of normalization momentum for the moving average. norm_epsilon: A `float` added to variance to avoid dividing by zero. kernel_initializer: A `str` name of kernel_initializer for convolutional layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. Default is None. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. **kwargs: Additional keyword arguments to be passed. """ self._config_dict = { 'input_specs': input_specs, 'min_level': min_level, 'max_level': max_level, 'num_filters': num_filters, 'fusion_type': fusion_type, 'use_separable_conv': use_separable_conv, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_initializer': kernel_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, } if use_separable_conv: conv2d = tf.keras.layers.SeparableConv2D else: conv2d = tf.keras.layers.Conv2D if use_sync_bn: norm = tf.keras.layers.experimental.SyncBatchNormalization else: norm = tf.keras.layers.BatchNormalization activation_fn = tf.keras.layers.Activation( tf_utils.get_activation(activation)) # Build input feature pyramid. if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 # Get input feature pyramid from backbone. logging.info('FPN input_specs: %s', input_specs) inputs = self._build_input_pyramid(input_specs, min_level) backbone_max_level = min(int(max(inputs.keys())), max_level) # Build lateral connections. feats_lateral = {} for level in range(min_level, backbone_max_level + 1): feats_lateral[str(level)] = conv2d( filters=num_filters, kernel_size=1, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer)(inputs[str(level)]) # Build top-down path. feats = { str(backbone_max_level): feats_lateral[str(backbone_max_level)] } for level in range(backbone_max_level - 1, min_level - 1, -1): feat_a = spatial_transform_ops.nearest_upsampling( feats[str(level + 1)], 2) feat_b = feats_lateral[str(level)] if fusion_type == 'sum': feats[str(level)] = feat_a + feat_b elif fusion_type == 'concat': feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1) else: raise ValueError( 'Fusion type {} not supported.'.format(fusion_type)) # TODO(xianzhi): consider to remove bias in conv2d. # Build post-hoc 3x3 convolution kernel. for level in range(min_level, backbone_max_level + 1): feats[str(level)] = conv2d(filters=num_filters, strides=1, kernel_size=3, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer)( feats[str(level)]) # TODO(xianzhi): consider to remove bias in conv2d. # Build coarser FPN levels introduced for RetinaNet. for level in range(backbone_max_level + 1, max_level + 1): feats_in = feats[str(level - 1)] if level > backbone_max_level + 1: feats_in = activation_fn(feats_in) feats[str(level)] = conv2d( filters=num_filters, strides=2, kernel_size=3, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer)(feats_in) # Apply batch norm layers. for level in range(min_level, max_level + 1): feats[str(level)] = norm(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(feats[str(level)]) self._output_specs = { str(level): feats[str(level)].get_shape() for level in range(min_level, max_level + 1) } super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
def __init__(self, num_classes, upsample_factor=2, num_convs=4, num_filters=256, use_separable_conv=False, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_regularizer=None, bias_regularizer=None, class_agnostic=False, **kwargs): """Initialize params to build the mask head. Args: num_classes: `int`, the number of classes. upsample_factor: `int`, >= 1, the upsample factor to generate the final predicted masks. num_convs: `int` number that represents the number of the intermediate conv layers before the mask prediction layers. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. activation: `string`, indicating which activation is used, e.g. 'relu', 'swish', etc. use_sync_bn: `bool`, whether to use synchronized batch normalization across different replicas. norm_momentum: `float`, the momentum parameter of the normalization layers. norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer kernel. bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. class_agnostic: `bool`, if set, we use a single channel mask head that is shared between all classes. **kwargs: other keyword arguments passed to Layer. """ super(MaskHead, self).__init__(**kwargs) self._config_dict = { 'num_classes': num_classes, 'upsample_factor': upsample_factor, 'num_convs': num_convs, 'num_filters': num_filters, 'use_separable_conv': use_separable_conv, 'activation': activation, 'use_sync_bn': use_sync_bn, 'norm_momentum': norm_momentum, 'norm_epsilon': norm_epsilon, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer, 'class_agnostic': class_agnostic } if tf.keras.backend.image_data_format() == 'channels_last': self._bn_axis = -1 else: self._bn_axis = 1 self._activation = tf_utils.get_activation(activation)