def iou_loss(y, y_hat): """The IoU metric, or Jaccard Index, is similar to the Dice metric and is calculated as the ratio between the overlap of the positive instances between two sets, and their mutual combined values: https://arxiv.org/abs/1911.08287""" y_hat = K.flatten(y_hat) y = K.flatten(y) intersection = K.sum(K.dot(y, y_hat)) total = K.sum(y) + K.sum(y_hat) union = total - intersection iou = (intersection + K.epsilon()) / (union + K.epsilon()) return 1 - iou
def step(self, x, states): h = states[0] # states[1] necessary? # comes from the constants X_static = states[-2] # equals Kb.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim] total_x_static_prod = states[-1] # expand dims to add the vector which is only valid for this time step # to total_x_prod which is valid for all time steps hw = kb.expand_dims(kb.dot(h, self._W2), 1) additive_atn = kb.tanh(total_x_static_prod) + kb.tanh(hw) attention = kb.softmax(kb.dot(additive_atn, self._V), axis=1) static_x_weighted = kb.sum(attention * X_static, [1]) x = kb.dot(kb.concatenate([x, static_x_weighted], 1), self._W3) + self._b3 h, new_states = self.layer.cell.call(x, states[:-2]) # append attention to the states to "smuggle" it out of the RNN wrapper attention = kb.squeeze(attention, -1) h = kb.concatenate([h, attention]) return h, new_states
def call(self, x, mask=None): x_cont, x_ques, c_mask, q_mask = x # get similarity matrix S subres0 = tf.tile(K.dot(x_cont, self.W0), [1, 1, self.q_maxlen]) subres1 = tf.tile( K.permute_dimensions(K.dot(x_ques, self.W1), pattern=(0, 2, 1)), [1, self.c_maxlen, 1]) subres2 = K.batch_dot(x_cont * self.W2, K.permute_dimensions(x_ques, pattern=(0, 2, 1))) S = subres0 + subres1 + subres2 S += self.bias q_mask = tf.expand_dims(q_mask, 1) S_ = tf.nn.softmax(self.mask_logits(S, q_mask)) c_mask = tf.expand_dims(c_mask, 2) S_T = K.permute_dimensions( tf.nn.softmax(self.mask_logits(S, c_mask), axis=1), (0, 2, 1)) c2q = tf.matmul(S_, x_ques) q2c = tf.matmul(tf.matmul(S_, S_T), x_cont) result = K.concatenate([x_cont, c2q, x_cont * c2q, x_cont * q2c], axis=-1) return result
def call(self, inputs, states): h, m = states u = (K.dot(inputs, self.input_encoders) + K.dot(h, self.hidden_encoders) + K.dot(m, self.memory_encoders)) m = m + K.dot(m, self.AT) + K.dot(u, self.BT) h = self.hidden_activation( K.dot(inputs, self.input_kernel) + K.dot(h, self.hidden_kernel) + K.dot(m, self.memory_kernel)) return h, [h, m]
def call(self, inputs): if len(inputs) == 3: X, A, I = inputs self.data_mode = 'disjoint' else: X, A = inputs I = tf.zeros(tf.shape(X)[:1]) self.data_mode = 'single' if K.ndim(I) == 2: I = I[:, 0] I = tf.cast(I, tf.int32) A_is_sparse = K.is_sparse(A) # Get mask y = self.compute_scores(X, A, I) N = K.shape(X)[-2] indices = ops.segment_top_k(y[:, 0], I, self.ratio, self.top_k_var) mask = tf.scatter_nd(tf.expand_dims(indices, 1), tf.ones_like(indices), (N,)) # Multiply X and y to make layer differentiable features = X * self.gating_op(y) axis = 0 if len(K.int_shape(A)) == 2 else 1 # Cannot use negative axis in tf.boolean_mask # Reduce X X_pooled = tf.boolean_mask(features, mask, axis=axis) # Compute A^2 if A_is_sparse: A_dense = tf.sparse.to_dense(A) else: A_dense = A A_squared = K.dot(A, A_dense) # Reduce A A_pooled = tf.boolean_mask(A_squared, mask, axis=axis) A_pooled = tf.boolean_mask(A_pooled, mask, axis=axis + 1) if A_is_sparse: A_pooled = ops.dense_to_sparse(A_pooled) output = [X_pooled, A_pooled] # Reduce I if self.data_mode == 'disjoint': I_pooled = tf.boolean_mask(I[:, None], mask)[:, 0] output.append(I_pooled) if self.return_mask: output.append(mask) return output
def call(self, inputs, **kwargs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') if self.div_val == 1: out = K.gather(self.embeddings, inputs) if self.embed_dim != self.output_dim or self.force_projection: out = K.dot(out, self.projections) else: out = K.tile( K.expand_dims(K.zeros_like(inputs, dtype=K.floatx()), axis=-1), (1, ) * K.ndim(inputs) + (self.output_dim, ), ) for i in range(len(self.cutoffs) - 1): embed_dim = self.embed_dim // (self.div_val**i) low, high = self.cutoffs[i], self.cutoffs[i + 1] mask = K.cast(low <= inputs, K.floatx()) * K.cast( inputs < high, K.floatx()) selected = K.gather(self.embeddings[i], (inputs - low) * K.cast(mask, 'int32')) if embed_dim != self.output_dim or self.force_projection: projected = K.dot(selected, self.projections[i]) else: projected = selected out += projected * K.expand_dims(mask, axis=-1) if self.return_embeddings or self.return_projections: out = [out] if self.return_embeddings: if self.div_val == 1: out += [self.embeddings] else: out += [embed + 0.0 for embed in self.embeddings] if self.return_projections: if self.div_val == 1: if self.projections is not None: out += [self.projections] else: out += [proj + 0.0 for proj in self.projections] return out
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift): rotation = math.pi * rotation / 180. shear = math.pi * shear / 180. # Rotation c1 = tf.math.cos(rotation) s1 = tf.math.sin(rotation) one = tf.constant([1], dtype='float32') zero = tf.constant([0], dtype='float32') rotation_matrix = tf.reshape( tf.concat([c1, s1, zero, -s1, c1, zero, zero, zero, one], axis=0), [3, 3]) # Shear c2 = tf.math.cos(shear) s2 = tf.math.sin(shear) shear_matrix = tf.reshape( tf.concat([one, s2, zero, zero, c2, zero, zero, zero, one], axis=0), [3, 3]) # Zoom zoom_matrix = tf.reshape( tf.concat([ one / height_zoom, zero, zero, zero, one / width_zoom, zero, zero, zero, one ], axis=0), [3, 3]) # Shift shift_matrix = tf.reshape( tf.concat( [one, zero, height_shift, zero, one, width_shift, zero, zero, one], axis=0), [3, 3]) return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))
def call(self, x): if len(x) == 3: #解析传入的入Q_seq,K_seq,V_seq Q_seq, K_seq, V_seq = x Q_len, V_len = None, None elif len(x) == 5: #Q_len,V_len为mask的长度 Q_seq, K_seq, V_seq, Q_len, V_len = x print("Q_seq", Q_seq) #对Q、K、V做线性变换,一共做nb_head次,每次线性变化成size_per_head维度 Q_seq = K.dot(Q_seq, self.WQ) Q_seq = K.reshape( Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head)) Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3)) #相当于transpose,排列各维度的顺序 K_seq = K.dot(K_seq, self.WK) K_seq = K.reshape( K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head)) K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3)) V_seq = K.dot(V_seq, self.WV) V_seq = K.reshape( V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head)) V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3)) #计算内积,然后mask,然后softmax A = tf.matmul( Q_seq, K_seq, transpose_b=True ) / self.size_per_head**0.5 #K.batch_dot(Q_seq, K_seq, axes=[3,3]) / self.size_per_head**0.5 A = K.permute_dimensions(A, (0, 3, 2, 1)) A = self.Mask(A, V_len, 'add') A = K.permute_dimensions(A, (0, 3, 2, 1)) A = K.softmax(A) #输出并mask O_seq = tf.matmul(A, V_seq) #K.batch_dot(A, V_seq, axes=[3,2]) O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3)) O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim)) O_seq = self.Mask(O_seq, Q_len, 'mul') return O_seq
def call(self, inputs, states, training=None): prev_output = states[0] if nest.is_sequence(states) else states dp_mask = self.get_dropout_mask_for_cell(inputs, training) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( prev_output, training) if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel if dp_mask is not None: h = K.dot(inputs * dp_mask, quantized_kernel) else: h = K.dot(inputs, quantized_kernel) if self.bias is not None: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias h = K.bias_add(h, quantized_bias) if rec_dp_mask is not None: prev_output = prev_output * rec_dp_mask if self.recurrent_quantizer: quantized_recurrent = self.recurrent_quantizer_internal( self.recurrent_kernel) else: quantized_recurrent = self.recurrent_kernel output = h + K.dot(prev_output, quantized_recurrent) if self.activation is not None: output = self.activation(output) return output, [output]
def align_loss(y_true, y_pred): ''' source and target alignment loss in the intermediate layers of the target model allignment is performed in the target model (both source and target features are from target model) y-true - is dummy value( that is full of zeros) y-pred - is the value of intermediate layers in the target model 1:batch_size - is source samples batch_size:end - is target samples ''' gs = y_pred[:batch_size,:] # source domain features gt = y_pred[batch_size:,:] # target domain features gdist = L2_dist(gs,gt) align_loss = K.sum(self.gamma * (gdist)) cc_loss_s0 = K.sum(K.square(K.dot(K.transpose(gs-self.cc_s0),K.reshape(self.ys[:,0], (-1, 1)))))/K.sum(self.ys[:,0]) cc_loss_s1 = K.sum(K.square(K.dot(K.transpose(gs-self.cc_s1),K.reshape(self.ys[:,1], (-1, 1)))))/K.sum(self.ys[:,1]) # print("cc_loss_s0, s1: ", cc_loss_s0, cc_loss_s1) gt_left = gt[ :len(self.yt), :] cc_loss_t0 = K.sum(K.square(K.dot(K.transpose(gt_left-self.cc_t0),K.reshape(self.yt[:,0], (-1, 1)))))/K.sum(self.yt[:,0]) cc_loss_t1 = K.sum(K.square(K.dot(K.transpose(gt_left-self.cc_t1),K.reshape(self.yt[:,1], (-1, 1)))))/K.sum(self.yt[:,1]) cc_dis_s = L2_dist_center(self.cc_s0, self.cc_s1) cc_dis_t = L2_dist_center(self.cc_t0, self.cc_t1) cc_loss = cc_loss_s0 + cc_loss_s1 + cc_loss_t0 + cc_loss_t1 - (cc_dis_s + cc_dis_t) # loss of cc # print loss # print(K.print_tensor(cc_dis_s, message='cc_dis_s = ')) #cc_dis_s.eval(), cc_dis_t.eval()) # print(K.print_tensor(cc_dis_t, message='cc_dis_t = ')) # print(K.print_tensor(cc_loss_s0, message='cc_loss_s0 = ')) # print(K.print_tensor(cc_loss_s1, message='cc_loss_s1 = ')) # print(K.print_tensor(cc_loss_t0, message='cc_loss_t0 = ')) # print(K.print_tensor(cc_loss_t1, message='cc_loss_t1 = ')) # print(K.print_tensor(align_loss, message='align_loss = ')) return self.ot_alpha * ( align_loss ) + cc_loss * self.closs
def _mask_rotation_matrix_zyz(self, params): phi = params[0] * 2 * np.pi - np.pi theta = params[1] * 2 * np.pi - np.pi psi_t = params[2] * 2 * np.pi - np.pi loc_r = params[ 3: 6] * 0 # magnitude of Fourier transformation is translation-invariant a1 = self._rotation_matrix_axis(2, psi_t) a2 = self._rotation_matrix_axis(1, theta) a3 = self._rotation_matrix_axis(2, phi) rm = K.dot(K.dot(a3, a2), a1) rm = tf.transpose(rm) c = K.dot(-rm, K.expand_dims(loc_r)) rm = K.flatten(rm) theta = K.concatenate([rm[:3], c[0], rm[3:6], c[1], rm[6:9], c[2]]) return theta
def call(self, inputs, **kwargs): """ Args: inputs (tensor): the node feature tensor Returns: GlobalAttentionSumPool tensor (tensor) """ X = inputs attn_coeff = K.dot(X, self.attn_kernel) attn_coeff = K.squeeze(attn_coeff, -1) attn_coeff = K.softmax(attn_coeff) output = K.batch_dot(attn_coeff, X) return output
def _get_Rt(self, labels): """Return a 3-D mask where mask[a,p,n] = 1 if l(a)==l(p) and l(a)!=l(n) mask[a,p,n] = -1 if l(a)!=l(p) and l(a)==l(n) mask[a,p,n] = 0 if l(a)==l(p) and l(a)==l(n) or l(a)!=l(p) and l(a)!=l(n) """ label_equal = tf.cast(K.dot(labels, tf.transpose(labels)), tf.float32) i_equal_j = tf.expand_dims(label_equal, 2) i_equal_k = tf.expand_dims(label_equal, 1) Rt = tf.math.subtract(i_equal_j, i_equal_k) return Rt
def call(self, x): import tensorflow.keras.backend as K predictions, targets = x # tensorflow loss = tf.nn.nce_loss(self.W, self.b, targets, predictions, self.neg_samples, self.num_classes) # keras self.add_loss(loss) logits = K.dot(predictions, K.transpose(self.W)) return logits
def _pairwise_distances(self, inputs: List[Tensor]) -> Tensor: emb_c, emb_r = inputs bs = K.shape(emb_c)[0] embeddings = K.concatenate([emb_c, emb_r], 0) dot_product = K.dot(embeddings, K.transpose(embeddings)) square_norm = K.batch_dot(embeddings, embeddings, axes=1) distances = K.transpose(square_norm) - 2.0 * dot_product + square_norm distances = distances[0:bs, bs:bs+bs] distances = K.clip(distances, 0.0, None) mask = K.cast(K.equal(distances, 0.0), K.dtype(distances)) distances = distances + mask * 1e-16 distances = K.sqrt(distances) distances = distances * (1.0 - mask) return distances
def decoder(self, inputs): decoder_inputs, encoder_encodings, encoder_masks = inputs if K.dtype(decoder_inputs) != 'int32': decoder_inputs = K.cast(decoder_inputs, 'int32') decoder_masks = K.equal(decoder_inputs, 0) # Embeddings embeddings = K.gather(self.embeddings, decoder_inputs) embeddings *= self._model_dim**0.5 # Scale # Position Encodings position_encodings = self.DecoderPositionEncoding(embeddings) # Embedings + Postion-encodings encodings = embeddings + position_encodings # Dropout encodings = K.dropout(encodings, self._dropout_rate) for i in range(self._decoder_stack): # Masked-Multi-head-Attention masked_attention = self.DecoderMultiHeadAttetions0[i] masked_attention_input = [ encodings, encodings, encodings, decoder_masks ] masked_attention_out = masked_attention(masked_attention_input) # Add & Norm masked_attention_out += encodings masked_attention_out = self.DecoderLayerNorms0[i]( masked_attention_out) # Multi-head-Attention attention = self.DecoderMultiHeadAttetions1[i] attention_input = [ masked_attention_out, encoder_encodings, encoder_encodings, encoder_masks ] attention_out = attention(attention_input) # Add & Norm attention_out += masked_attention_out attention_out = self.DecoderLayerNorms1[i](attention_out) # Feed-Forward ff = self.DecoderPositionWiseFeedForwards[i] ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = self.DecoderLayerNorms2[i](ff_out) # Pre-Softmax 与 Embeddings 共享参数 linear_projection = K.dot(encodings, K.transpose(self.embeddings)) outputs = K.softmax(linear_projection) return outputs
def create_selector_model(num_neurons_in_layers, fsize, dtsize, bridge_matrix1, bridge_matrix2): pu_input = tf.keras.layers.Input(shape=[fsize], name="pu_feature") do_input = tf.keras.layers.Input(shape=[fsize], name="do_feature") dt_input = tf.keras.layers.Input(shape=[dtsize], name="dt_feature") f = tf.keras.layers.concatenate([pu_input, do_input, dt_input]) f = tf.keras.layers.Dense(num_neurons_in_layers[0], activation='relu')(f) f = tf.keras.layers.Dense(num_neurons_in_layers[1], activation='softmax')(f) br_matrix1 = K.constant(bridge_matrix1) br_matrix2 = K.constant(bridge_matrix2) ob1 = K.dot(f, K.transpose(br_matrix1)) ob2 = K.dot(f, K.transpose(br_matrix2)) o1 = tf.keras.layers.concatenate([pu_input, ob1, dt_input]) ob = tf.keras.layers.concatenate([ob1, ob2, dt_input]) o2 = tf.keras.layers.concatenate([ob2, do_input, dt_input]) o = tf.keras.layers.concatenate([o1, ob, o2]) model = tf.keras.Model(inputs=[pu_input, do_input, dt_input], outputs=o) return model, pu_input, do_input, dt_input
def call(self, x: tf.Tensor) -> tf.Tensor: """Calculates the attention weights. Parameters ---------- x: tf.Tensor The input tensor. Returns ------- tf.Tensor The attention weighted sum of the input tensor. """ e = K.tanh(K.dot(x, self.W) + self.b) e = K.dot(e, self.V) a = K.softmax(e, axis=1) output = x * a if self.return_sequences: return output, a return K.sum(output, axis=1), a
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, x, mask=None): #print("x[0].shape = {}".format(x[0].shape), flush=True) # x[0] is N x feat_dim, x[1] is N x class_num onehot, self.centers is class_num x feat_dim delta_centers = K.dot(K.transpose(x[1]), (K.dot(x[1], self.centers) - x[0])) # 10x2 center_counts = K.sum(K.transpose(x[1]), axis=1, keepdims=True) + 1 # 10x1 delta_centers /= center_counts new_centers = self.centers - self.alpha * delta_centers #self.add_update((self.centers, new_centers), x) self.centers.assign( new_centers) # Chieko: something's wrong with add_update() # self.add_update((self.counter, self.counter + 1), x) self.result = x[0] - K.dot( x[1], self.centers ) # Chieko: recalculate the distance from center to each point self.result = K.sum(self.result**2, axis=1, keepdims=True) #/ K.dot(x[1], center_counts) # Chieko: N(x**2 + y**2) return self.result # Nx1
def call(self, x, training=False): deep_out = x for i in range(len(self.kernels)): #x = ks.layers.dot([x, kernel], axes =(-1,-1) ) #x = tf.tensordot(deep_out, self.kernels[i], axes =(-1,0) ) + self.bias[i] deep_out = K.dot(deep_out, self.kernels[i]) deep_out = K.bias_add(deep_out, self.bias[i], data_format='channels_last') if self.activations[i] and self.activations[i] is not None: deep_out = tf.keras.layers.Activation( self.activations[i])(deep_out) #x= tf.keras.layers.Dropout(self.dropout_rate)(x, training = training) return deep_out
def call(self, x, mask=None): # input: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE) # et: (BATCH_SIZE, MAX_TIMESTEPS) et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1) # at: (BATCH_SIZE, MAX_TIMESTEPS) at = K.softmax(et) if mask is not None: at *= K.cast(mask, K.floatx()) # atx: (BATCH_SIZE, MAX_TIMESTEPS, 1) atx = K.expand_dims(at, axis=-1) # ot: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE) ot = x * atx # output: (BATCH_SIZE, EMBED_SIZE) return K.sum(ot, axis=1)
def call(self, inputs, mask=None): q = k = v = inputs q_mask = k_mask = v_mask = mask # [N, max_len, emb_dim] * [emb_dim, emb_dim] = [N, max_len, emb_dim] q = K.dot(q, self.wq) k = K.dot(k, self.wk) v = K.dot(v, self.wv) q += self.bq k += self.bk v += self.bv # scale dot product y = ScaleDotProducttion()([q, k, v], [q_mask, k_mask, v_mask], self.n_head) y = ScaleDotProducttion.reshape_from_attention_shape(y, self.n_head) y = K.dot(y, self.wo) y += self.bo return y
def call(self, inputs): theta_b_output, theta_f_output = super(SeasonalBlock, self).call(inputs) t = K.cast(K.arange(-self.fdw, self.fw, 1) / self.fdw, tf.float32) cos_num = self.theta_units // 2 sin_num = (self.theta_units // 2 if self.theta_units % 2 == 0 else self.theta_units // 2 + 1) cos = K.stack([K.cos(2 * np.pi * i * t) for i in range(cos_num)], axis=0) sin = K.stack([K.sin(2 * np.pi * i * t) for i in range(sin_num)], axis=0) s = K.concatenate([cos, sin], axis=0) s_b = s[:, :self.fdw] s_f = s[:, self.fdw:] backcast = K.dot(theta_b_output, s_b) forecast = K.dot(theta_f_output, s_f) return backcast, forecast
def call(self, inputs): # Implement Eq.(9) perturbed_kernel = ( self.kernel + self.sigma_kernel * K.random_uniform(shape=self.kernel_shape)) outputs = K.dot(inputs, perturbed_kernel) if self.use_bias: perturbed_bias = ( self.bias + self.sigma_bias * K.random_uniform(shape=self.bias_shape)) outputs = K.bias_add(outputs, perturbed_bias) if self.activation is not None: outputs = self.activation(outputs) return outputs
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, inputs): # multiply inputs by weight kernel. (None, d)*(d, 2*output_dim) projected = K.dot(inputs, self.kernel) # reshape reshaped = K.reshape(projected, (-1, self.output_dim, 2)) # normalize along final axis projected_norm = K.l2_normalize(reshaped, -1) # normalize embeddings embeds_norm = K.l2_normalize(self.class_embeds, -1) # dot product cosine_similarity = K.sum(projected_norm*embeds_norm, axis=-1) # shift to unit interval #return 0.5*(cosine_similarity+1) return tf.nn.sigmoid(cosine_similarity)
def call(self, x): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) position_j = 1. / K.pow( 10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size ) position_j = K.expand_dims(position_j, 0) #按照x的1维度累计求和,与arange一样,生成序列。只不过按照x的实际长度来 position_i = tf.cumsum(K.ones_like(x[:,:,0]), 1)-1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def call(self, x): x = x[0] #print("x",x.shape) #print("trafo",self.trafo.shape) traf = K.dot(x, self.trafo) #print("traf",traf.shape) ret = K.reshape(traf, (-1, self.ogs, self.paramo)) #print("ret",ret.shape) return ret, self.graph
def _additive_similarity(self, source, query): concatenation = K.concatenate([source, query], axis=2) nonlinearity = K.tanh(K.dot(concatenation, self._weights["w_a"])) # tile the weight vector (1, 1, dim) for each time step and each element of the batch -> (bs, T, dim) source_shape = K.shape(source) vaeff = K.tile(K.expand_dims(self._weights["v_a"], 0), [source_shape[0], source_shape[1], 1]) similarity = K.batch_dot(K.permute_dimensions(vaeff, [0, 2, 1]), nonlinearity, axes=[1, 2]) return similarity