def Mildnet_vgg16_big(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) for layer in vgg_model.layers[:10]: layer.trainable = False intermediate_layer_outputs = get_layers_output_by_name(vgg_model, ["block1_pool", "block2_pool", "block3_pool", "block4_pool"]) convnet_output = GlobalAveragePooling2D()(vgg_model.output) for layer_name, output in intermediate_layer_outputs.items(): output = GlobalAveragePooling2D()(output) convnet_output = concatenate([convnet_output, output]) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) first_conv = Conv2D(96, kernel_size=(8, 8), strides=(16, 16), padding='same')(vgg_model.input) first_max = MaxPool2D(pool_size=(3, 3), strides=(4, 4), padding='same')(first_conv) first_max = Flatten()(first_max) first_max = Lambda(lambda x: K.l2_normalize(x, axis=1))(first_max) second_conv = Conv2D(96, kernel_size=(8, 8), strides=(32, 32), padding='same')(vgg_model.input) second_max = MaxPool2D(pool_size=(7, 7), strides=(2, 2), padding='same')(second_conv) second_max = Flatten()(second_max) second_max = Lambda(lambda x: K.l2_normalize(x, axis=1))(second_max) merge_one = concatenate([first_max, second_max]) merge_two = concatenate([merge_one, convnet_output], axis=1) emb = Dense(4096)(merge_two) l2_norm_final = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=l2_norm_final) return final_model
def call(self, inputs, **kwargs): embedded_split = K.reshape(inputs, shape=(self.N, self.M, -1)) center = K.l2_normalize(K.mean(embedded_split, axis=1), axis=-1) center_except = K.l2_normalize(K.reshape( K.sum(embedded_split, axis=1, keepdims=True) - embedded_split, shape=(self.N * self.M, -1)), axis=-1) similarity = K.concatenate([ K.concatenate([ K.sum(center_except[i * self.M:(i + 1) * self.M, :] * embedded_split[j, :, :], axis=1, keepdims=True) if i == j else K.sum( center[i:(i + 1), :] * embedded_split[j, :, :], axis=1, keepdims=True) for i in range(self.N) ], axis=1) for j in range(self.N) ], axis=0) similarity = self.w * similarity + self.b return similarity
def visnet_lrn2d_model(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) convnet_output = GlobalAveragePooling2D()(vgg_model.output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) first_maxpool = MaxPooling2D(pool_size=4, strides=4)(vgg_model.input) first_conv = Conv2D(96, kernel_size=8, strides=4, activation='relu')(first_maxpool) first_lrn2d = LRN2D(n=5)(first_conv) first_zero_padding = ZeroPadding2D(padding=(3, 3))(first_lrn2d) first_maxpool2 = MaxPooling2D(pool_size=7, strides=4, padding='same')(first_zero_padding) first_maxpool2 = Flatten()(first_maxpool2) first_maxpool2 = Lambda(lambda x: K.l2_normalize(x, axis=1))(first_maxpool2) second_maxpool = MaxPooling2D(pool_size=8, strides=8)(vgg_model.input) second_conv = Conv2D(96, kernel_size=8, strides=4, activation='relu')(second_maxpool) second_lrn2d = LRN2D(n=5)(second_conv) second_zero_padding = ZeroPadding2D(padding=(1, 1))(second_lrn2d) second_maxpool2 = MaxPooling2D(pool_size=3, strides=2, padding='same')(second_zero_padding) second_maxpool2 = Flatten()(second_maxpool2) second_maxpool2 = Lambda(lambda x: K.l2_normalize(x, axis=1))(second_maxpool2) merge_one = concatenate([first_maxpool2, second_maxpool2]) merge_two = concatenate([merge_one, convnet_output]) emb = Dense(4096)(merge_two) l2_norm_final = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) final_model = Model(inputs=vgg_model.input, outputs=l2_norm_final) return final_model
def visnet_model(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) convnet_output = GlobalAveragePooling2D()(vgg_model.output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) first_conv = Conv2D(96, kernel_size=(8, 8), strides=(16, 16), padding='same')(vgg_model.input) first_max = MaxPool2D(pool_size=(3, 3), strides=(4, 4), padding='same')(first_conv) first_max = Flatten()(first_max) first_max = Lambda(lambda x: K.l2_normalize(x, axis=1))(first_max) second_conv = Conv2D(96, kernel_size=(8, 8), strides=(32, 32), padding='same')(vgg_model.input) second_max = MaxPool2D(pool_size=(7, 7), strides=(2, 2), padding='same')(second_conv) second_max = Flatten()(second_max) second_max = Lambda(lambda x: K.l2_normalize(x, axis=1))(second_max) merge_one = concatenate([first_max, second_max]) merge_two = concatenate([merge_one, convnet_output], axis=1) emb = Dense(4096)(merge_two) l2_norm_final = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=l2_norm_final) return final_model
def norm(fc2): fc2_norm = K.l2_normalize(fc2, axis = 3); illum_est = K.tf.reduce_sum(fc2_norm, axis = (1, 2)); illum_est = K.l2_normalize(illum_est); return illum_est;
def ranknet(): vgg_model = VGG19(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) convnet_output = GlobalAveragePooling2D()(vgg_model.output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.5)(convnet_output) convnet_output = Dense(4096, activation='relu')(convnet_output) convnet_output = Dropout(0.5)(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) s1 = MaxPool2D(pool_size=(4, 4), strides=(4, 4), padding='valid')(vgg_model.input) s1 = ZeroPadding2D(padding=(4, 4), data_format=None)(s1) s1 = Conv2D(96, kernel_size=(8, 8), strides=(4, 4), padding='valid')(s1) s1 = ZeroPadding2D(padding=(2, 2), data_format=None)(s1) s1 = MaxPool2D(pool_size=(7, 7), strides=(4, 4), padding='valid')(s1) s1 = Flatten()(s1) s2 = MaxPool2D(pool_size=(8, 8), strides=(8, 8), padding='valid')(vgg_model.input) s2 = ZeroPadding2D(padding=(4, 4), data_format=None)(s2) s2 = Conv2D(96, kernel_size=(8, 8), strides=(4, 4), padding='valid')(s2) s2 = ZeroPadding2D(padding=(1, 1), data_format=None)(s2) s2 = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid')(s2) s2 = Flatten()(s2) merge_one = concatenate([s1, s2]) merge_one_norm = Lambda(lambda x: K.l2_normalize(x, axis=1))(merge_one) merge_two = concatenate([merge_one_norm, convnet_output], axis=1) emb = Dense(4096)(merge_two) l2_norm_final = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=l2_norm_final) return final_model
def call(self, inputs, **kwargs): pair1_embed, pair2_embed = inputs pair1_embed = K.l2_normalize(pair1_embed, axis=-1) pair2_embed = K.l2_normalize(pair2_embed, axis=-1) sim = K.dot(pair1_embed, K.transpose(pair2_embed)) sim = tf.linalg.tensor_diag_part(sim) return sim
def Mildnet_mobilenet(): vgg_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3)) intermediate_layer_outputs = get_layers_output_by_name( vgg_model, [ "conv_dw_1_relu", "conv_dw_2_relu", "conv_dw_4_relu", "conv_dw_6_relu", "conv_dw_12_relu" ]) convnet_output = GlobalAveragePooling2D()(vgg_model.output) for layer_name, output in intermediate_layer_outputs.items(): output = GlobalAveragePooling2D()(output) convnet_output = concatenate([convnet_output, output]) convnet_output = GlobalAveragePooling2D()(vgg_model.output) convnet_output = Dense(1024, activation='relu')(convnet_output) convnet_output = Dropout(0.5)(convnet_output) convnet_output = Dense(1024, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))( convnet_output) first_input = Input(shape=(224, 224, 3)) second_input = Input(shape=(224, 224, 3)) final_model = tf.keras.models.Model( inputs=[first_input, second_input, vgg_model.input], outputs=convnet_output) return final_model
def GetModel(): base_model = MobileNetV2(input_shape=(224, 224, 3), weights='imagenet', include_top=False, pooling='max') for layer in base_model.layers: layer.trainable = False x = base_model.output x = Dropout(0.6)(x) x = Dense(embedding_dim)(x) x = Lambda(lambda x: K.l2_normalize(x, axis=1))(x) embedding_model = Model(base_model.input, x, name='embedding') input_shape = (image_size, image_size, 3) anchor_input = Input(input_shape, name='anchor_input') positive_input = Input(input_shape, name='positive_input') negative_input = Input(input_shape, name='negative_input') anchor_embedding = embedding_model(anchor_input) positive_embedding = embedding_model(positive_input) negative_embedding = embedding_model(negative_input) inputs = [anchor_input, positive_input, negative_input] outputs = [anchor_embedding, positive_embedding, negative_embedding] triplet_model = Model(inputs, outputs) triplet_model.add_loss(K.mean(triplet_loss(outputs))) return embedding_model, triplet_model
def Mildnet_vgg16(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) for layer in vgg_model.layers[:10]: layer.trainable = False intermediate_layer_outputs = get_layers_output_by_name( vgg_model, ["block1_pool", "block2_pool", "block3_pool", "block4_pool"]) convnet_output = GlobalAveragePooling2D()(vgg_model.output) for layer_name, output in intermediate_layer_outputs.items(): output = GlobalAveragePooling2D()(output) convnet_output = concatenate([convnet_output, output]) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))( convnet_output) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=convnet_output) return final_model
def SS_VLAD_best(dimensions=[59, 201], num_speak=2000, emb_dim=64, clusters=14): input_feat = Input(shape=(dimensions[1], dimensions[0])) # Bidirectional layers x_1 = Bidirectional(CuDNNLSTM(200, return_sequences=True))(input_feat) x_2 = Bidirectional(CuDNNLSTM(200, return_sequences=True))(x_1) x_3 = Bidirectional(CuDNNLSTM(200, return_sequences=True))(x_2) x_conc = Concatenate(axis=2)([x_1, x_2, x_3]) emb = TimeDistributed(Dense(256, activation="relu"))(x_conc) emb = BatchNormalization()(emb) # Embedding layer emb = VLAD(k_centers=clusters)(emb) emb = BatchNormalization()(emb) emb = Dense(emb_dim, activation="relu")(emb) emb = BatchNormalization()(emb) emb = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) # Softmax layer softmax = Dense(num_speak, activation="softmax")(emb) test_model = Model(inputs=input_feat, outputs=softmax) return test_model
def call(self, u_vecs, **kwargs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like( u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): c = softmax(b, 1) o = K.batch_dot(c, u_hat_vecs, [2, 2]) if K.backend() == 'theano': o = K.sum(o, axis=1) if i < self.routings - 1: o = K.l2_normalize(o, -1) b = K.batch_dot(o, u_hat_vecs, [2, 3]) if K.backend() == 'theano': b = K.sum(b, axis=1) return self.activation(o)
def call(self, inputs, **kwargs): for i in range(1, self.num_layers + 1): inputs = getattr(self, 'lstm' + str(i))(inputs) inputs = getattr(self, 'proj' + str(i))(inputs) # L2-normalize to get embeddings embeddings = K.l2_normalize(inputs, axis=-1) return embeddings
def Mildnet_without_skip_big(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) convnet_output = Dense(2048, activation='relu')(vgg_model.output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=convnet_output) return final_model
def call(self, inputs, mask=None): cos_m = math.cos(self.m) sin_m = math.sin(self.m) mm = sin_m * self.m threshold = math.cos(math.pi - self.m) # features X = inputs[0] # 1-D or one-hot label works as mask Y_mask = inputs[1] # If Y_mask is not in one-hot form, transfer it to one-hot form. if Y_mask.shape[-1] == 1: Y_mask = K.cast(Y_mask, tf.int32) Y_mask = K.reshape(K.one_hot(Y_mask, self.class_num), (-1, self.class_num)) X_normed = K.l2_normalize(X, axis=1) # L2 Normalized X W_normed = K.l2_normalize(self.W, axis=0) # L2 Normalized Weights # cos(theta + m) cos_theta = K.dot(X_normed, W_normed) # 矩阵乘法 cos_theta2 = K.square(cos_theta) sin_theta2 = 1. - cos_theta2 sin_theta = K.sqrt(sin_theta2 + K.epsilon()) cos_tm = self.s * ((cos_theta * cos_m) - (sin_theta * sin_m)) # This condition controls the theta + m should in range [0, pi] # 0 <= theta + m < = pi # -m <= theta <= pi - m cond_v = cos_theta - threshold cond = K.cast(K.relu(cond_v), dtype=tf.bool) keep_val = self.s * (cos_theta - mm) cos_tm_temp = tf.where(cond, cos_tm, keep_val) # mask by label # Y_mask =+ K.epsilon() # Why??? inv_mask = 1. - Y_mask s_cos_theta = self.s * cos_theta output = K.softmax((s_cos_theta * inv_mask) + (cos_tm_temp * Y_mask)) return output
def _attend_over_memory(self, inputs, memory, ws, num_memory_slots, rel_table, pos_table): inputs = K.dot(inputs, ws["input_kernel"]) inputs = K.bias_add(inputs, ws["input_bias"]) inputs = K.expand_dims(inputs, axis=1) memory_plus_inputs = K.concatenate([memory, inputs], axis=1) context_layer = self._attention_layer(memory_plus_inputs, ws, num_memory_slots, rel_table, pos_table) beta1, beta2 = array_ops.split(ws["layer_norm_beta"], 2, axis=0) mlp_b1, mlp_b2 = array_ops.split(ws["mlp_bias"], 2, axis=0) context_layer = memory_plus_inputs + context_layer context_layer = K.l2_normalize( context_layer - K.mean(context_layer, axis=-1, keepdims=True), axis=-1) context_layer = context_layer * ws["layer_norm_gamma"][:, :, :self. units] context_layer = K.bias_add(context_layer, beta1) mlp_layer = K.dot(context_layer, ws["mlp_kernel"][:, :self.units]) mlp_layer = K.bias_add(mlp_layer, mlp_b1) mlp_layer = self.mlp_activation(mlp_layer) mlp_layer = K.dot(mlp_layer, ws["mlp_kernel"][:, self.units:]) mlp_layer = K.bias_add(mlp_layer, mlp_b2) context_layer = context_layer + mlp_layer context_layer = K.l2_normalize( context_layer - K.mean(context_layer, axis=-1, keepdims=True), axis=-1) context_layer = context_layer * ws["layer_norm_gamma"][:, :, self.units:] context_layer = K.bias_add(context_layer, beta2) new_memory, outputs = array_ops.split(context_layer, [num_memory_slots, 1], axis=1) outputs = K.squeeze(outputs, axis=1) return outputs, new_memory
def call(self, x, mask=None): """ The actual processing in the layer: Normalize, padd, then convolution. """ input_1, input_2 = x input_shape = input_1.shape # assert input_shape == input_2._keras_shape self.H = input_shape[1] self.W = input_shape[2] self.C = input_shape[3] # normalization if self.use_norm is 'euclidean': input_1 = K.l2_normalize(input_1, axis=2) input_2 = K.l2_normalize(input_2, axis=2) if self.use_norm is 'scaling': input_1_min = K.min(input_1, axis=2, keepdims=True) input_1_max = K.max(input_1, axis=2, keepdims=True) input_1 = (input_1 - input_1_min) / (input_1_max - input_1_min + 0.000001) input_2_min = K.min(input_2, axis=2, keepdims=True) input_2_max = K.max(input_2, axis=2, keepdims=True) input_2 = (input_2 - input_2_min) / (input_2_max - input_2_min + 0.000001) if self.use_norm is 'standardization': input_1 = (input_1 - K.mean(input_1, axis=2, keepdims=True)) + 0.00001 input_1 = K.l2_normalize(input_1, axis=2) input_2 = (input_2 - K.mean(input_2, axis=2, keepdims=True)) + 0.00001 input_2 = K.l2_normalize(input_2, axis=2) # Pad the first input1 circular, so that a correlation can be computed for # every horizontal position padding1 = RangePadding2D(padding=self.W // 2)(input_1) # tf.scan的原理解析:https://zhuanlan.zhihu.com/p/96503559 out = tf.scan(self.single_sample_corr, elems=[padding1, input_2], initializer=(K.zeros((int(self.H), int(self.W), int(self.output_dim)))) ) return out
def __init__(self, input_tensor, losses, input_range=(0, 255), wrt_tensor=None, norm_grads=True): """Creates an optimizer that minimizes weighted loss function. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the final optimized input to the given range. (Default value=(0, 255)) wrt_tensor: Short for, with respect to. This instructs the optimizer that the aggregate loss from `losses` should be minimized with respect to `wrt_tensor`. `wrt_tensor` can be any tensor that is part of the model graph. Default value is set to None which means that loss will simply be minimized with respect to `input_tensor`. norm_grads: True to normalize gradients. Normalization avoids very small or large gradients and ensures a smooth gradient gradient descent process. If you want the actual gradient (for example, visualizing attention), set this to false. """ self.input_tensor = input_tensor self.input_range = input_range self.loss_names = [] self.loss_functions = [] self.wrt_tensor = self.input_tensor if wrt_tensor is None else wrt_tensor if self.input_tensor is self.wrt_tensor: self.wrt_tensor_is_input_tensor = True self.wrt_tensor = K.identity(self.wrt_tensor) else: self.wrt_tensor_is_input_tensor = False overall_loss = None for loss, weight in losses: # Perf optimization. Don't build loss function with 0 weight. if weight != 0: loss_fn = weight * loss.build_loss() overall_loss = loss_fn if overall_loss is None else overall_loss + loss_fn self.loss_names.append(loss.name) self.loss_functions.append(loss_fn) # Compute gradient of overall with respect to `wrt` tensor. if self.wrt_tensor_is_input_tensor: grads = K.gradients(overall_loss, self.input_tensor)[0] else: grads = K.gradients(overall_loss, self.wrt_tensor)[0] if norm_grads: grads = K.l2_normalize(grads) # The main function to compute various quantities in optimization loop. self.compute_fn = K.function( [self.input_tensor, K.learning_phase()], self.loss_functions + [overall_loss, grads, self.wrt_tensor])
def _l2_normalize(x, axis=-1): '''Calculate L2 normalization. Args: x: input tensor. axis: axis for narmalization. Returns: L2 normalized tensor. ''' return keras_backend.l2_normalize(x, axis=axis)
def max_singular_val(w, u, fully_differentiable=False, ip=1): if not fully_differentiable: w_ = K.stop_gradient(w) else: w_ = w u = K.expand_dims(u, axis=-1) u_bar = u for _ in range(ip): v_bar = tf.matmul(w_, u_bar, transpose_a=True) v_bar = K.l2_normalize(v_bar, axis=(-1, -2)) u_bar_raw = tf.matmul(w_, v_bar) u_bar = K.l2_normalize(u_bar_raw, axis=(-1, -2)) sigma = tf.matmul(u_bar, tf.matmul(w, v_bar), transpose_a=True) sigma = K.squeeze(sigma, axis=-1) sigma = K.squeeze(sigma, axis=-1) u_bar = K.squeeze(u_bar, axis=-1) return sigma, u_bar
def FCN(input_shape): vgg16_model = VGG16(weights = 'imagenet', include_top = False, input_shape = input_shape); #Sq_net = squeezenet(float(input_shape)); fire8 = extract_layer_from_model(vgg16_model, layer_name = 'block4_pool'); pool8 = MaxPooling2D((3,3), strides = (2,2), name = 'pool8')(fire8.output); fc1 = Conv2D(64, (6,6), strides= (1, 1), padding = 'same', name = 'fc1')(pool8); fc1 = Dropout(rate = 0.5)(fc1); if SEPERATE_CONFIDENCE: fc2 = Conv2D(4 , (1, 1), strides = (1, 1), padding = 'same', activation = 'relu', name = 'fc2')(fc1); rgb = K.l2_normalize(fc2[:, :, :, 0:3], axis = 3); w, h = map(int, fc2.get_shape()[1:3]); confidence = fc2[:, :, :, 3:4]; confidence = np.reshape(confidence, [-1, w*h]); confidence = K.softmax(confidence); confidence = np.reshape(confidence, shape=[-1, w, h, 1]); fc2 = rgb * confidence; else: fc2 = Conv2D(3, (1, 1), strides = (1, 1), padding = 'same', name = 'fc2')(fc1); fc2 = Activation('relu')(fc2); fc2 = Conv2D(3, (15, 15), padding = 'valid', name = 'fc_pooling')(fc2); def norm(fc2): fc2_norm = K.l2_normalize(fc2, axis = 3); illum_est = K.tf.reduce_sum(fc2_norm, axis = (1, 2)); illum_est = K.l2_normalize(illum_est); return illum_est; #illum_est = Dense(3)(fc2); illum_est = Lambda(norm)(fc2); FCN_model = Model(inputs = vgg16_model.input, outputs = illum_est, name = 'FC4'); return FCN_model;
def max_singular_val_for_convolution(w, u, fully_differentiable=False, ip=1, padding='same', strides=(1, 1), data_format='channels_last'): assert ip >= 1 if not fully_differentiable: w_ = K.stop_gradient(w) else: w_ = w u_bar = u for _ in range(ip): v_bar = K.conv2d(u_bar, w_, strides=strides, data_format=data_format, padding=padding) v_bar = K.l2_normalize(v_bar) u_bar_raw = K.conv2d_transpose(v_bar, w_, output_shape=K.int_shape(u), strides=strides, data_format=data_format, padding=padding) u_bar = K.l2_normalize(u_bar_raw) u_bar_raw_diff = K.conv2d_transpose(v_bar, w, output_shape=K.int_shape(u), strides=strides, data_format=data_format, padding=padding) sigma = K.sum(u_bar * u_bar_raw_diff) return sigma, u_bar
def _embedding_model(input_shape, embedding_size): inputs = Input(shape=input_shape, name="img_input") x = Conv2D(16, (4, 4), activation="relu")(inputs) x = MaxPooling2D(pool_size=(2, 2))(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Conv2D(64, (2, 2), activation="relu")(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Flatten()(x) x = Dense(16)(x) x = Dense(embedding_size)(x) output = Lambda(lambda tensor: K.l2_normalize(tensor, axis=1), name='normalized_embedding')(x) model = Model(inputs=[inputs], outputs=[output]) return model
def __init__(self, k1: int, w1: int, k2: int, w2: int, dropout_rate: float): super().__init__() self.logger = logging.getLogger(__name__) # causal padding to ensure the conv keep the size of the input throughout # Keras requires the input to be the same size as the output self.conv1 = TimeDistributed( Conv1D(k1, w1, activation='relu', padding='causal', name='attention_fet_conv1')) self.conv2 = TimeDistributed( Conv1D(k2, w2, padding='causal', name='attention_fet_conv2')) self.dropout = Dropout(dropout_rate) self.l2_norm = Lambda(lambda x: backend.l2_normalize(x, axis=1), name='attention_fet_l2_norm')
def Mildnet_resnet(): model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3), pooling='avg') for layer in model.layers[:143]: layer.trainable = False intermediate_layer_outputs = get_layers_output_by_name(model, ['activation_46', 'activation_43']) convnet_output = model.output for layer_name, output in intermediate_layer_outputs.items(): output = GlobalAveragePooling2D()(output) convnet_output = concatenate([convnet_output, output]) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))(convnet_output) final_model = tf.keras.models.Model(inputs=model.input, outputs=convnet_output) return final_model
def SphereSpeaker(dimensions=[59, 201], num_speak=2500, emb_dim=512): input_feat = Input(shape=(dimensions[1], dimensions[0])) # Bidirectional layers x_1 = Bidirectional(CuDNNLSTM(250, return_sequences=True))(input_feat) x_2 = Bidirectional(CuDNNLSTM(250, return_sequences=True))(x_1) x_3 = Bidirectional(CuDNNLSTM(250, return_sequences=True))(x_2) x_conc = Concatenate(axis=2)([x_1, x_2, x_3]) emb = BatchNormalization()(x_conc) emb = Dense(emb_dim, activation="relu")(emb) emb = GlobalAveragePooling1D()(emb) emb = BatchNormalization()(emb) emb = Lambda(lambda x: K.l2_normalize(x, axis=1))(emb) # Softmax layer softmax = Dense(num_speak, activation="softmax")(emb) test_model = Model(inputs=input_feat, outputs=softmax) return test_model
def Mildnet_vgg16_skip_4(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) for layer in vgg_model.layers[:10]: layer.trainable = False convnet_output = Dense(2048, activation='relu')(vgg_model.output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))( convnet_output) first_input = Input(shape=(224, 224, 3)) second_input = Input(shape=(224, 224, 3)) final_model = tf.keras.models.Model( inputs=[first_input, second_input, vgg_model.input], outputs=convnet_output) return final_model
def get_audio_subnetwork(self): return Sequential([ InputLayer((257, 200, 1), name='audio_input'), Conv2D(64, 3, 2, padding='same', name='audio_conv1_1'), BatchNormalization(), ReLU(), Conv2D(64, 3, padding='same', name='audio_conv1_2'), BatchNormalization(), ReLU(), MaxPool2D(2, name='audio_pool1'), Conv2D(128, 3, padding='same', name='audio_conv2_1'), BatchNormalization(), ReLU(), Conv2D(128, 3, padding='same', name='audio_conv2_2'), BatchNormalization(), ReLU(), MaxPool2D(2, name='audio_pool2'), Conv2D(256, 3, padding='same', name='audio_conv3_1'), BatchNormalization(), ReLU(), Conv2D(256, 3, padding='same', name='audio_conv3_2'), BatchNormalization(), ReLU(), MaxPool2D(2, name='audio_pool3'), Conv2D(512, 3, padding='same', name='audio_conv4_1'), BatchNormalization(), ReLU(), Conv2D(512, 3, padding='same', name='audio_conv4_2'), BatchNormalization(), ReLU(), MaxPool2D((16, 12), name='audio_pool4'), Dense(128, name='audio_fc1'), ReLU(), Dense(128, name='audio_fc2'), Lambda(lambda x: K.l2_normalize(x), name='audio_L2_norm') ])
def call(self, x, mask=None): output = K.l2_normalize(x, self.axis) return output * self.gamma
def call(self, X): return _K.l2_normalize(X, axis=1)