def call(self, inputs, training): #norms = tf.keras.utils.normalize(inputs,axis=-1,order=1) #LOGPrint(inputs,"inputs") y_expand = tf.expand_dims(inputs,axis=-2) # LOGPrint(y_expand,"y_expand") y_expand = K.repeat_elements(y_expand, rep=self.codes_count, axis=-2) #LOGPrint(y_expand,"y_expand") broadcast_shape = (tf.shape(inputs)[0], self.codes_count,self.codes_width) tables_expand = tf.broadcast_to(self.codes_table_expand, broadcast_shape) # LOGPrint(tables_expand,"tables_expand") #LOGPrint(y_expand,"y_expand") euclide_y = self.euclide_size(y_expand,tables_expand) #LOGPrint(euclide_y,"LAYER ECOCLayerEuclideSoftmax: euclide_y") #negative euclide distance #should be max euclidean distance instead of 10.0 but for this purpose is enough euclide_y = 10.0 - euclide_y #LOGPrint(euclide_y,"LAYER ECOCLayerEuclideSoftmax: 10 - euclide_y") return euclide_y
def call(self, inputs, **kwargs): gate_outputs = [] final_outputs = [] # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper expert_outputs = tf.tensordot(a=inputs, b=self.expert_kernels, axes=1) # Add the bias term to the expert weights if necessary expert_outputs = K.bias_add(x=expert_outputs, bias=self.expert_bias) expert_outputs = self.expert_activation(expert_outputs) # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper for index, gate_kernel in enumerate(self.gate_kernels): gate_output = K.dot(x=inputs, y=gate_kernel) # Add the bias term to the gate weights if necessary gate_output = K.bias_add(x=gate_output, bias=self.gate_bias[index]) gate_output = self.gate_activation(gate_output) gate_outputs.append(gate_output) # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) for gate_output in gate_outputs: expanded_gate_output = tf.expand_dims(gate_output, axis=1) weighted_expert_output = expert_outputs * K.repeat_elements( expanded_gate_output, self.units, axis=1) final_outputs.append(K.sum(weighted_expert_output, axis=2)) return final_outputs
def call(self, inputs, training=None, **kwargs): query, keys = inputs # query: [bs, 1, features*dim] # keys: [bs, T, features*dim] keys_len = keys.get_shape()[1] # T queries = K.repeat_elements(query, keys_len, 1) # 把query重复T次 成[bs, T, features*dim] att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) # 拼接成 [bs, T, 4*feature*dim] att_out = self.dnn(att_input, training=training) # 处理成 [bs, T, hidden] # lambda x:tf.nn.bias_add(tf.tensordot(x[0], x[1], axes=(-1, 0)), x[2]) # att_out 和 self.kernel做tensordot attention_score = self.dense([att_out, self.kernel, self.bias]) # 从这里可以看出来,和transformer的self Attention的定义并不一样, # self Attention中 # 计算query和所有key的点乘,然后softmax后,每一个query有一个seqlen的score,用这个score加权所有的key,得到了Attention之后的query # Din中 # 把query和key拼起来计算一个[bs, T],也是seqlen长度的序列 # 按理来说一个dense层是能捕捉输入的query和key之间的关系的 return attention_score
def call(self, inputs, training=None, mask=None): outputs = [self.stemlayer(inputs)] for i in range(self.sample_depth): outputs.append(self.Donwsamples[i](outputs[-1])) temp = outputs.pop(-1) for i in range(self.sample_depth): temp = k.repeat_elements(temp, 2, axis=1) + outputs.pop(-1) temp = self.convoutput([temp, inputs]) return temp
def call(self, inputs, training): #LOGPrint(inputs,"inputs 000") norm = tf.norm( inputs, ord=2,axis=-1) norm_expand = tf.expand_dims(norm,axis=-1) norm_expand = K.repeat_elements(norm_expand, rep=self.width, axis=-1) return ((inputs/norm_expand) + 1 )/ 2.0
def __call__(self, q, k, v, mask=None): d_k, d_v = self.d_k, self.d_v n_head = self.n_head if self.mode == 0: qs = self.qs_layer(q) # [batch_size, len_q, n_head*d_k] ks = self.ks_layer(k) vs = self.vs_layer(v) def reshape1(x): s = tf.shape(x) # [batch_size, len_q, n_head * d_k] x = tf.reshape(x, [s[0], s[1], n_head, d_k]) x = tf.transpose(x, [2, 0, 1, 3]) x = tf.reshape( x, [-1, s[1], d_k]) # [n_head * batch_size, len_q, d_k] return x qs = Lambda(reshape1)(qs) ks = Lambda(reshape1)(ks) vs = Lambda(reshape1)(vs) if mask is not None: mask = Lambda(lambda x: K.repeat_elements(x, n_head, 0))(mask) head, attn = self.attention(qs, ks, vs, mask=mask) def reshape2(x): s = tf.shape(x) # [n_head * batch_size, len_v, d_v] x = tf.reshape(x, [n_head, -1, s[1], s[2]]) x = tf.transpose(x, [1, 2, 0, 3]) x = tf.reshape(x, [-1, s[1], n_head * d_v ]) # [batch_size, len_v, n_head * d_v] return x head = Lambda(reshape2)(head) elif self.mode == 1: heads = [] attns = [] for i in range(n_head): qs = self.qs_layers[i](q) ks = self.ks_layers[i](k) vs = self.vs_layers[i](v) head, attn = self.attention(qs, ks, vs, mask) heads.append(head) attns.append(attn) head = Concatenate()(heads) if n_head > 1 else heads[0] attn = Concatenate()(attns) if n_head > 1 else attns[0] outputs = self.w_o(head) outputs = Dropout(self.dropout)(outputs) if not self.layer_norm: return outputs, attn outputs = Add()([outputs, q]) return self.layer_norm(outputs), attn
def call(self, inputs,**kwargs): query,keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query,keys_len,1) att_input = K.concatenate([queries, keys, queries - keys, queries * keys], axis=-1) att_input = BatchNormalization()(att_input) att_out = MLP(self.hidden_size, self.activation, self.l2_reg, self.keep_prob, self.use_bn, seed=self.seed,name=self.name+"mlp")(att_input) attention_score = Dense(1, 'linear')(att_out) return attention_score
def call(self, inputs,**kwargs): query,keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query,keys_len,1) att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) att_input = tf.layers.batch_normalization(att_input) att_out = MLP(self.hidden_size, self.activation, self.l2_reg, self.keep_prob, self.use_bn, seed=self.seed)(att_input) attention_score = tf.nn.bias_add(tf.tensordot(att_out,self.kernel,axes=(-1,0)),self.bias) return attention_score
def call(self, inputs, training=None, **kwargs): query, keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query, keys_len, 1) att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) att_out = self.dnn(att_input, training=training) attention_score = self.dense([att_out, self.kernel, self.bias]) return attention_score
def init_model(model_file_path): backbone_model = load_model(model_file_path, custom_objects={ "tf": tf, "swish": tf.nn.swish }, compile=False) input_tensor = Input(shape=list(backbone_model.input_shape[1:-1]) + [1]) output_tensor = Lambda(lambda x: K.repeat_elements(x, rep=3, axis=3), name="repeat_elements")(input_tensor) preprocess_input_wrapper = lambda x: x / 255.0 output_tensor = Lambda(preprocess_input_wrapper, name="preprocess_input")(output_tensor) output_tensor_list = backbone_model(output_tensor) model = Model(inputs=input_tensor, outputs=output_tensor_list) return model
def call(self, inputs, training=None, **kwargs): query, keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query, keys_len, 1) # 相同元素重复,BP att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) att_out = self.dnn( att_input, training=training ) # (batch_size, T, hidden_size[-1]) # !!! 每个向量经过相同的DNN处理 attention_score = self.dense([att_out, self.kernel, self.bias]) # (batch_size, T, 1) return attention_score
def call(self, inputs, **kwargs): """ Method for the forward function of the layer. :param inputs: Input tensor :param kwargs: Additional keyword arguments for the base method :return: A tensor """ assert isinstance(inputs, list) expert_input, gate_input = inputs gate_outputs = [] final_outputs = [] # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper expert_outputs = tf.tensordot(a=expert_input, b=self.expert_kernels, axes=1) # Add the bias term to the expert weights if necessary if self.use_expert_bias: expert_outputs = K.bias_add(x=expert_outputs, bias=self.expert_bias) expert_outputs = self.expert_activation(expert_outputs) # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper for index, gate_kernel in enumerate(self.gate_kernels): gate_output = K.dot(x=gate_input, y=gate_kernel) # Add the bias term to the gate weights if necessary if self.use_gate_bias: gate_output = K.bias_add(x=gate_output, bias=self.gate_bias[index]) gate_output = self.gate_activation(gate_output) gate_outputs.append(gate_output) ## []list, 每一个元素是一维数组,数组元素是每个expert的gating权值, 共有task个数组 # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) for gate_output in gate_outputs: expanded_gate_output = K.expand_dims(gate_output, axis=1) weighted_expert_output = expert_outputs * K.repeat_elements( expanded_gate_output, self.units, axis=1) final_outputs.append(K.sum(weighted_expert_output, axis=2)) return final_outputs
def call(self, inputs, training): y_expand = tf.expand_dims(inputs,axis=-2) # LOGPrint(y_expand,"y_expand") y_expand = K.repeat_elements(y_expand, rep=self.codes_count, axis=-2) #LOGPrint(y_expand,"y_expand") broadcast_shape = (tf.shape(inputs)[0], self.codes_count,self.codes_width) tables_expand = tf.broadcast_to(self.codes_table_expand, broadcast_shape) #LOGPrint(tables_expand,"tables_expand") together = tf.multiply(y_expand,tables_expand) + tf.multiply(1.0-y_expand,1.0-tables_expand) return tf.math.reduce_min(together,axis=-1)
def call(self, inputs, training=None, **kwargs): query, keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query, keys_len, 1) att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) att_out = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed)(att_input, training=training) attention_score = tf.keras.layers.Lambda(lambda x: tf.nn.bias_add( tf.tensordot(x[0], x[1], axes=(-1, 0)), x[2]))( [att_out, self.kernel, self.bias]) return attention_score
def call(self, inputs): X = inputs[0] # Node features (B x N x F) A = inputs[1] # Adjacency matrix (B x N x N) X_dims = X.get_shape().as_list() B, N, F = X_dims feature_self = K.dot(X, self.self_kernel) feature_neighbor = K.dot(X, self.neighbor_kernel) # repeat_elements is same as np.repeat. # it repeats element to row direction. # Example. # z = np.array([[1,2,3],[4,5,6]]) # shape=(2, 3) # repeat = 4 # np.reshape(np.repeat(z, repeat, axis=-1), (2, 3, repeat)) # > array([[[1, 1, 1, 1], # [2, 2, 2, 2], # [3, 3, 3, 3]], # [[4, 4, 4, 4], # [5, 5, 5, 5], # [6, 6, 6, 6]]]) feature_self = K.repeat_elements(feature_self, N, axis=2) feature_self = K.reshape(feature_self, (-1, N, N, self.units)) feature_neighbor = K.repeat_elements(feature_neighbor, N, axis=2) feature_neighbor = K.reshape(feature_neighbor, (-1, N, N, self.units)) T = (0, 2, 1, 3) if self.merge_method == "concat": if self.node_axis == "row": merged = tf.concat( [feature_self, tf.transpose(feature_neighbor, T)], axis=-1) else: merged = tf.concat( [tf.transpose(feature_self, T), feature_neighbor], axis=-1) else: if self.node_axis == "row": merged = feature_self + tf.transpose(feature_neighbor, T) else: merged = tf.transpose(feature_self, T) + feature_neighbor activation_func = tf.nn.tanh if self.use_attention_kernel: attention = K.dot(activation_func(merged), self.attention_kernel) else: attention = activation_func(merged) attention = K.reshape(attention, (-1, N, N)) if self.use_bias: attention = K.bias_add(attention, self.bias) mask = -10e9 * (1.0 - A) attention += mask attention = tf.nn.softmax(attention) output = tf.matmul(attention, X) if self.return_attention: return (output, attention) else: return output
def expend_as(tensor, rep,name): my_repeat = Lambda(lambda x, repnum: K.repeat_elements(x, repnum, axis=3), arguments={'repnum': rep}, name='psi_up'+name)(tensor) return my_repeat
def MT_Hybrid_CAN(n_frame, nb_filters1, nb_filters2, input_shape_1, input_shape_2, kernel_size_1=(3, 3, 3), kernel_size_2=(3, 3), dropout_rate1=0.25, dropout_rate2=0.5, pool_size_1=(2, 2, 2), pool_size_2=(2, 2), nb_dense=128): diff_input = Input(shape=input_shape_1) rawf_input = Input(shape=input_shape_2) # Motion branch d1 = Conv3D(nb_filters1, kernel_size_1, padding='same', activation='tanh')(diff_input) d2 = Conv3D(nb_filters1, kernel_size_1, activation='tanh')(d1) # App branch r1 = Conv2D(nb_filters1, kernel_size_2, padding='same', activation='tanh')(rawf_input) r2 = Conv2D(nb_filters1, kernel_size_2, activation='tanh')(r1) # Mask from App (g1) * Motion Branch (d2) g1 = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(r2) g1 = Attention_mask()(g1) g1 = K.expand_dims(g1, axis=-1) gated1 = multiply([d2, g1]) # Motion Branch d3 = AveragePooling3D(pool_size_1)(gated1) d4 = Dropout(dropout_rate1)(d3) d5 = Conv3D(nb_filters2, kernel_size_1, padding='same', activation='tanh')(d4) d6 = Conv3D(nb_filters2, kernel_size_1, activation='tanh')(d5) # App branch r3 = AveragePooling2D(pool_size_2)(r2) r4 = Dropout(dropout_rate1)(r3) r5 = Conv2D(nb_filters2, kernel_size_2, padding='same', activation='tanh')(r4) r6 = Conv2D(nb_filters2, kernel_size_2, activation='tanh')(r5) # Mask from App (g2) * Motion Branch (d6) g2 = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(r6) g2 = Attention_mask()(g2) g2 = K.repeat_elements(g2, d6.shape[3], axis=-1) g2 = K.expand_dims(g2, axis=-1) gated2 = multiply([d6, g2]) # Motion Branch d7 = AveragePooling3D(pool_size_1)(gated2) d8 = Dropout(dropout_rate1)(d7) # Motion Branch d9 = Flatten()(d8) d10_y = Dense(nb_dense, activation='tanh')(d9) d11_y = Dropout(dropout_rate2)(d10_y) out_y = Dense(n_frame, name='output_1')(d11_y) d10_r = Dense(nb_dense, activation='tanh')(d9) d11_r = Dropout(dropout_rate2)(d10_r) out_r = Dense(n_frame, name='output_2')(d11_r) model = Model(inputs=[diff_input, rawf_input], outputs=[out_y, out_r]) return model
def upsampling2d_tpu(inputs, scale=2): x = K.repeat_elements(inputs, scale, axis=1) x = K.repeat_elements(x, scale, axis=2) return x