def test_unknown_shape(self): fn = mish.get_concrete_function( tf.TensorSpec(shape=None, dtype=tf.float32)) for shape in [(1, ), (1, 2), (1, 2, 3), (1, 2, 3, 4)]: x = tf.ones(shape=shape, dtype=tf.float32) self.assertAllClose(fn(x), mish(x))
def build_extractor(self, X): """ network structure h0: input layer """ mode = 1 if mode == 1: h1 = tf.nn.relu(tf.matmul(X, self.ext_W[0]) + self.ext_b[0]) h1d = tf.nn.dropout(h1, self.pkeep) h2bn = self.relu_BN(h1d, self.ext_W[1], self.is_training, "ext_BN_1") h3bn = self.relu_BN(h2bn, self.ext_W[2], self.is_training, "ext_BN_2") + X h4bn = self.relu_BN(h3bn, self.ext_W[3], self.is_training, "ext_BN_3") h5bn = self.relu_BN(h4bn, self.ext_W[4], self.is_training, "ext_BN_4") h6bn = self.relu_BN(h5bn, self.ext_W[5], self.is_training, "ext_BN_5") + X h7bn = self.relu_BN(h6bn, self.ext_W[6], self.is_training, "ext_BN_6") elif mode == 2: h1 = tf.nn.swish(tf.matmul(X, self.ext_W[0]) + self.ext_b[0]) h1d = tf.nn.dropout(h1, self.pkeep) h2bn = self.swish_BN(h1d, self.ext_W[1], self.is_training, "ext_BN_1") h3bn = self.swish_BN(h2bn, self.ext_W[2], self.is_training, "ext_BN_2") + X h4bn = self.swish_BN(h3bn, self.ext_W[3], self.is_training, "ext_BN_3") h5bn = self.swish_BN(h4bn, self.ext_W[4], self.is_training, "ext_BN_4") h6bn = self.swish_BN(h5bn, self.ext_W[5], self.is_training, "ext_BN_5") + X h7bn = self.swish_BN(h6bn, self.ext_W[6], self.is_training, "ext_BN_6") elif mode == 3: h1 = tfa.mish(tf.matmul(X, self.ext_W[0]) + self.ext_b[0]) h1d = tf.nn.dropout(h1, self.pkeep) h2bn = self.mish_BN(h1d, self.ext_W[1], self.is_training, "ext_BN_1") h3bn = self.mish_BN(h2bn, self.ext_W[2], self.is_training, "ext_BN_2") + X h4bn = self.mish_BN(h3bn, self.ext_W[3], self.is_training, "ext_BN_3") h5bn = self.mish_BN(h4bn, self.ext_W[4], self.is_training, "ext_BN_4") h6bn = self.mish_BN(h5bn, self.ext_W[5], self.is_training, "ext_BN_5") + X h7bn = self.mish_BN(h6bn, self.ext_W[6], self.is_training, "ext_BN_6") y_pred = tf.nn.softmax(tf.matmul(h7bn, self.ext_W[7]) + self.ext_b[7]) self.extracted_features = h7bn return y_pred
def __init__( self: DarknetConv, fil: int, ksize: int, act: bool = True, # activation actfunc: str = 'leaky', ds: bool = False, # down sampling bn: bool = True # batch normalizaion ) -> None: global conv_count super().__init__() self.weighted_layers = list() # padding if ds: self.pad = ZeroPadding2D(((1, 0), (1, 0))) strides = 2 padding = 'VALID' else: self.pad = tf.identity strides = 1 padding = 'SAME' # convolution self.conv = Conv2D( filters=fil, kernel_size=ksize, strides=strides, padding=padding, use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), kernel_initializer=tf.random_normal_initializer( stddev=0.01 ), bias_initializer=tf.constant_initializer(0.) ) # batch normalization if bn: self.bn = BatchNormalization() else: self.bn = tf.identity # activation if act and actfunc == 'leaky': self.act = Lambda( lambda x: tf.nn.leaky_relu(x, alpha=0.1) ) elif act and actfunc == 'mish': self.act = Lambda( lambda x: mish(x) ) else: self.act = tf.identity self.weighted_layer = WeightedLayer() self.weighted_layers.append(self.weighted_layer) return
def verify_funcs_are_equivalent(self, dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = mish(x) y_py = _mish_py(x) self.assertAllCloseAccordingToType(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) self.assertAllCloseAccordingToType(grad_native, grad_py)
def call(self, input_tensor): if self._sc_layer: residual = self._sc_layer(input_tensor) else: residual = input_tensor output = self._conv1(input_tensor) output = self._bn1(output) output = self._activation1(output) output = self._conv2(output) output = self._bn2(output) output = self._activation2(output) output = self._conv3(output) output = self._bn3(output) output = Add()([output, residual]) return mish(output)
def verify_funcs_are_equivalent(dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = mish(x) y_py = _mish_py(x) test_utils.assert_allclose_according_to_type(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) test_utils.assert_allclose_according_to_type(grad_native, grad_py, atol=1e-5)
def mish_BN(self, x, w, is_training, name="tmp"): """ Args: x: input feature tensor w: weight matrix is_training: in training->True, in testing->False """ return tfa.mish( tf.layers.batch_normalization( tf.matmul(x, w), momentum=0.99, epsilon=0.001, center=True, scale=False, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), moving_mean_initializer=tf.zeros_initializer(), moving_variance_initializer=tf.ones_initializer(), training=is_training, trainable=True, # name=None, name=name, reuse=None))
def test_mish(self, dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant( [-0.2525015, -0.30340144, 0.0, 0.86509836, 1.943959], dtype=dtype) self.assertAllCloseAccordingToType(mish(x), expected_result)
def test_mish(dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant( [-0.2525015, -0.30340144, 0.0, 0.86509836, 1.943959], dtype=dtype) test_utils.assert_allclose_according_to_type(mish(x), expected_result)
def call(self, x, training=None): return mish(x)
def activate(input_layer): out = BatchNormalization()(input_layer) out = mish(out) return out