def __call__(self, x): regularization = 0. if self.l2_reg > 0.: regularization += K.sum(self.l2_reg * K.square(x)) if self.reshape is None: if self.s_ll_reg > 0.: regularization += self.s_ll_reg * K.mean( self.errfun(self.S_ll, K.dot(K.transpose(x), x))) if self.orth_reg > 0.: regularization += self.orth_reg * K.mean( K.square((K.ones( (self.embedding_dim, self.embedding_dim)) - K.eye( self.embedding_dim)) * K.dot(x, K.transpose(x)))) else: x_reshaped = K.reshape(x, self.reshape) for i in range(self.reshape[2]): if self.s_ll_reg > 0.: regularization += self.s_ll_reg * K.mean( self.errfun( self.S_ll[:, :, i], K.dot(K.transpose(x_reshaped[:, :, i]), x_reshaped[:, :, i]))) if self.orth_reg > 0.: regularization += self.orth_reg * K.mean( K.square((K.ones( (self.embedding_dim, self.embedding_dim)) - K.eye(self.embedding_dim)) * K.dot(x_reshaped[:, :, i], K.transpose(x_reshaped[:, :, i])))) return regularization
def call(self, x): if self.stateful: output_list = [] for index in range(self.batch_size): current_matrix = x[index] prev_matrix = self.initial_SE3[index] current_cumulative = tf.matmul(current_matrix, prev_matrix) #iself.initial_SE3[index] = current_cumulative output_list.append(current_cumulative) if (self.batch_size and self.batch_size > 0): output_tensor = K.stack(output_list) updates = list(zip(self.initial_SE3, output_list)) self.add_update(updates, x) else: output_tensor = K.stack([K.eye(4)]) return output_tensor else: output_list = [] prev_matrix = self.initial_SE3[0] for index in range(self.batch_size): current_matrix = x[index] current_cumulative = tf.matmul(current_matrix, prev_matrix) prev_matrix = current_cumulative output_list.append(current_cumulative) if (self.batch_size and self.batch_size > 0): output_tensor = K.stack(output_list) else: output_tensor = K.stack([K.eye(4)]) return output_tensor
def call(self, x): """ x: Nx D1 x D2 W1 : D1 x d1 W2: D2 x d2 W: D2 x D2 """ # first mode projection x = nmodeproduct(x, self.W1, 1) # N x d1 x D2 # enforcing constant (1) on the diagonal W = self.W - self.W * K.eye(self.in_shape[2], dtype='float32') + K.eye( self.in_shape[2], dtype='float32') / self.in_shape[2] # calculate attention attention = Activations.softmax(nmodeproduct(x, W, 2), axis=-1) # N x d1 x D2 # apply attention x = self.alpha * x + (1.0 - self.alpha) * x * attention # second mode projection x = nmodeproduct(x, self.W2, 2) # bias add x = x + self.bias if self.output_dim[1] == 1: x = K.squeeze(x, axis=-1) return x
def call(self, inputs, training=None): z, gamma_k = inputs gamma_k_sum = K.sum(gamma_k) est_phi = K.mean(gamma_k, axis=0) est_mu = K.dot(K.transpose(gamma_k), z) / gamma_k_sum est_sigma = K.dot(K.transpose(z - est_mu), gamma_k * (z - est_mu)) / gamma_k_sum est_sigma = est_sigma + (K.random_normal( shape=(K.int_shape(z)[1], 1), mean=1e-3, stddev=1e-4) * K.eye(K.int_shape(z)[1])) self.add_update(K.update(self.phi, est_phi), inputs) self.add_update(K.update(self.mu, est_mu), inputs) self.add_update(K.update(self.sigma, est_sigma), inputs) est_sigma_diag_inv = K.eye(K.int_shape(self.sigma)[0]) / est_sigma self.add_loss(self.lambd_diag * K.sum(est_sigma_diag_inv), inputs) phi = K.in_train_phase(est_phi, self.phi, training) mu = K.in_train_phase(est_mu, self.mu, training) sigma = K.in_train_phase(est_sigma, self.sigma, training) return GaussianMixtureComponent._calc_component_density( z, phi, mu, sigma)
def _mh_loss(y_true, y_pred): positive = K.reshape(K.sum(K.eye(n) * y_pred, axis=0), (n, 1)) negative_captions = y_pred - K.eye(n) * alpha negative_images = K.transpose(negative_captions) return K.sum( K.max(K.maximum(0., alpha - positive + negative_captions), axis=1) + K.max(K.maximum(0., alpha - positive + negative_images), axis=1))
def __call__(self, x): size = int(np.sqrt(x.shape[1].value)) assert (size * size == x.shape[1].value) x = K.reshape(x, (-1, size, size)) xxt = K.batch_dot(x, x, axes=(2, 2)) regularization = 0.0 if self.l1: regularization += K.sum(self.l1 * K.abs(xxt - K.eye(size))) if self.l2: regularization += K.sum(self.l2 * K.square(xxt - K.eye(size))) return regularization
def page_ranking(query, candidates): reprs = K.concatenate((query[None, :], candidates), axis=0) sims = K.dot(reprs, K.transpose(reprs)) W_mask = 1 - K.eye(maxsents + 1) W = W_mask * sims d = (K.epsilon() + K.sum(W, axis=0))**-1 D = K.eye(maxsents + 1) * d P = K.dot(W, D) y = K.concatenate((K.ones(1), K.zeros(maxsents))) x_r = (1 - alpha) * K.dot( T.nlinalg.matrix_inverse(K.eye(maxsents + 1) - alpha * P), y) return x_r[1:]
def Mask(self, inputs): mask = K.eye(self.max_len) #[ml, ml] mask = K.cumsum(mask, 1) #[ml,ml] mask = K.expand_dims(mask, axis=0) #[bs, ml, ml] eye = K.eye(self.max_len) eye = K.expand_dims(eye, axis=0) mask = mask - eye mask = K.expand_dims(mask, axis=1) #[1,1, ml,ml] mask = K.permute_dimensions(mask, (0, 3, 2, 1)) return inputs - mask * 1e12
def gp(x, hyp, n_high, n_low): rho = hyp[0,6]#reset up for convenience x_l = x[0:n_low] x_h = x[n_low:n_high + n_low] K_LL = kerne1(x_l, x_l, hyp[0,0:3]) + K.eye(n_low)* K.pow(hyp[0,2],2) K_LH = rho*kerne1(x_l, x_h, hyp[0,0:3]) K_HL = rho*kerne1(x_h, x_l, hyp[0,0:3]) K_HH = K.pow(rho, 2) * kerne1(x_h, x_h, hyp[0,0:3]) \ + kerne1(x_h, x_h, hyp[0,3:5]) + K.eye(n_high)* K.pow(hyp[0,5],2) k_up = K.concatenate([K_LL, K_LH], axis = -1) k_down = K.concatenate([K_HL, K_HH], axis = -1) k = K.concatenate([k_up, k_down], axis = 0) return k
def tangent_distance(signals, protos, subspaces, squared=False, epsilon=K.epsilon()): # Note: subspaces is always assumed as transposed and must be orthogonal! # shape(signals): batch x proto_number x channels x dim1 x dim2 x ... x dimN # shape(protos): proto_number x dim1 x dim2 x ... x dimN # shape(subspaces): (optional [proto_number]) x prod(dim1 * dim2 * ... * dimN) x prod(projected_atom_shape) signal_shape = mixed_shape(signals) shape = tuple([i if isinstance(i, int) else None for i in signal_shape]) subspace_shape = K.int_shape(subspaces) if not equal_shape((shape[1],) + shape[3:], K.int_shape(protos)): raise ValueError("The shape of signals[2:] must be equal protos. You provide: signals.shape[2:]=" + str((shape[1],) + shape[3:]) + " != protos.shape=" + str(K.int_shape(protos))) with K.name_scope('tangent_distance'): atom_axes = list(range(3, len(signal_shape))) signals = K.permute_dimensions(signals, [0, 2, 1] + atom_axes) diff = signals - protos # global tangent space if K.ndim(subspaces) == 2: with K.name_scope('projector'): projector = K.eye(subspace_shape[-2]) - K.dot(subspaces, K.transpose(subspaces)) with K.name_scope('tangentspace_projections'): diff = K.reshape(diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)) projected_diff = K.dot(diff, projector) projected_diff = K.reshape(projected_diff, (signal_shape[0], signal_shape[2], signal_shape[1]) + signal_shape[3:]) diss = p_norm(projected_diff, order_p=2, axis=atom_axes, squared=squared, keepdims=False, epsilon=epsilon) return K.permute_dimensions(diss, [0, 2, 1]) # local tangent spaces elif K.ndim(subspaces) == 3: with K.name_scope('projector'): projector = K.eye(subspace_shape[-2]) - K.batch_dot(subspaces, subspaces, [2, 2]) with K.name_scope('tangentspace_projections'): diff = K.reshape(diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)) diff = K.permute_dimensions(diff, [1, 0, 2]) projected_diff = K.batch_dot(diff, projector) projected_diff = K.reshape(projected_diff, (signal_shape[1], signal_shape[0], signal_shape[2]) + signal_shape[3:]) diss = p_norm(projected_diff, order_p=2, axis=atom_axes, squared=squared, keepdims=False, epsilon=epsilon) return K.permute_dimensions(diss, [1, 0, 2])
def build(self, shape_input): """ Build the RNN Cell using certain variables. We have the number of stations M; we set the initial conditions for mu and pfd as randomely chosen following a uniform distrobution. We add a weight that tracks the mu values. We set a constaint that is is non-negative. We set a weight that tracks the matrix P, and we set constraints as detailed in the report. We define the 'odot' function as shown in the paper. """ M = shape_input[1] - 1 self.I = k_back.eye(M) init_mu = RandomUniform(minval=0.01, maxval=10) init_pfd = RandomUniform(minval=0.01, maxval=10) self.mu = self.add_weight('mu', shape=(M, 1), initializer=init_mu, constraint=NonNeg()) data_p = self.add_weight('data_p', shape=(M, M - 1), initializer=init_pfd, constraint=NonNeg()) data_p_scaled = data_p / k_back.sum(data_p, axis=1, keepdims=True) self.P = k_back.reshape( k_back.flatten(data_p_scaled)[None, :] @ k_back.one_hot( [j for j in range(M * M) if j % (M + 1) != 0], M * M), (M, M)) self.odot = (self.P - self.I) * self.mu self.is_built = True
def call(self, xw, mask=None): # input: mu_i/sigma2_i x, weights = xw gamma = K.sum(x * weights, axis=1) N = K.sum(weights, axis=1, keepdims=True) var_i = K.exp(self.D) cov_i = K.dot(self.chol.T * var_i, self.chol) #var_i = K.expand_dims(K.exp(self.D), axis=-1) #cov_i = K.dot(self.chol, var_i*self.chol.T) prec_i = K.expand_dims(K2.matrix_inverse(cov_i), axis=0) I = K.expand_dims(K.eye(self.units, dtype=float_keras()), axis=0) #prec = I + N * (prec_i - I) prec = I + N * prec_i fcov = lambda x: K2.matrix_inverse(x) cov = K.map_fn(fcov, prec) cov = 0.5 * (cov + K.permute_dimensions(cov, [0, 2, 1])) mu = K.batch_dot(gamma, cov) fchol = lambda x: K2.cholesky(x, lower=False) chol = K.map_fn(fchol, cov) fdiag = lambda x: K2.diag(x) sigma = K.map_fn(fdiag, chol) chol = chol / K.expand_dims(sigma, axis=-1) logvar = 2 * K.log(sigma) return [mu, logvar, chol]
def masked_softmax(input_layer, n_nodes, batch_size): """ A Lambda layer to mask a matrix of outputs to be lower-triangular. Each row must sum up to one. We apply a lower triangular mask of ones and then add an upper triangular mask of a large negative number. Parameters ---------- input_layer: keras layer object (n x 1, n) matrix Returns ------- output_layer: keras layer object (n x 1, n) matrix """ mask_lower = K.theano.tensor.tril(K.ones((n_nodes - 1, n_nodes))) mask_upper = \ K.theano.tensor.triu(-100. * K.ones((n_nodes - 1, n_nodes)), 1) mask_layer = mask_lower * input_layer + mask_upper mask_layer = mask_layer + 2 * K.eye(n_nodes)[0:n_nodes - 1, 0:n_nodes] mask_layer = \ K.reshape(mask_layer, (batch_size * (n_nodes - 1), n_nodes)) softmax_layer = K.softmax(mask_layer) output_layer = K.reshape(softmax_layer, (batch_size, n_nodes - 1, n_nodes)) return output_layer
def call (self, inputs, mask=None): features = inputs[0] # Shape: (None, num_nodes, num_features) A = inputs[1:] # Shapes: (None, num_nodes, num_nodes) eye = A[0] * K.zeros(self.num_nodes, dtype='float32') + K.eye(self.num_nodes, dtype='float32') # eye = K.eye(self.num_nodes, dtype='float32') if self.consecutive_links: shifted = tf.manip.roll(eye, shift=1, axis=0) A.append(shifted) if self.backward_links: for i in range(len(A)): A.append(K.permute_dimensions(A[i], [0, 2, 1])) if self.self_links: A.append(eye) AHWs = list() for i in range(self.num_adjacency_matrices): if self.edge_weighting: features *= self.W_edges[i] HW = K.dot(features, self.W[i]) # Shape: (None, num_nodes, output_dim) AHW = K.batch_dot(A[i], HW) # Shape: (None, num_nodes, num_features) AHWs.append(AHW) AHWs_stacked = K.stack(AHWs, axis=1) # Shape: (None, num_supports, num_nodes, num_features) output = K.max(AHWs_stacked, axis=1) # Shape: (None, num_nodes, output_dim) if self.bias: output += self.b return self.activation(output)
def FeatureTransformNet(ipts): """ipts is a keras tensor""" ipt = Input(shape=(points, 1, 64), name="FeatureTransformNet_Input") net = Conv2D(filters=64, kernel_size=(1, 1), activation="relu")(ipt) net = Conv2D(filters=128, kernel_size=(1, 1), activation="relu")(net) net = Conv2D(filters=1024, kernel_size=(1, 1), activation="relu")(net) max_pool = MaxPool2D(pool_size=(points, 1))(net) net = Flatten()(max_pool) net = Dense(units=512, activation="relu")(net) net = Dense(units=256, activation="relu")(net) net = Dense(units=64 * 64)(net) bias = Input(tensor=K.eye(64, dtype="float32"), name="FeatureTransformNet_Bias") expand = Lambda(function=lambda x: K.expand_dims(x, axis=0))(bias) expand = Flatten()(expand) # added = Add()([net, expand]) added = Lambda(function=lambda t: t[0] + t[1])([net, expand]) result = Reshape(target_shape=(64, 64), name="FeatureTransformNet_Output")(added) model = Model(inputs=[ipt, bias], outputs=[result]) print("Feature transform net:") model.summary() return model([ipts, bias]), bias
def triplet_batch_hard_loss(y_true, y_pred): # y_pred is the embedding, y_true is the IDs (labels) of the samples (not 1-hot encoded) # They are mini-batched. If batch_size is B, and embedding dimension is D, shapes are: # y_true: (B,) # y_pred: (B,D) # Get all-pairs distances y_true = K.sum(y_true, axis=1) diffs = K.expand_dims(y_pred, axis=1) - K.expand_dims(y_pred, axis=0) dist_mat = K.sqrt(K.sum(K.square(diffs), axis=-1) + K.epsilon()) same_identity_mask = K.equal(K.expand_dims(y_true, axis=1), K.expand_dims(y_true, axis=0)) # TODO: make this backend-agnostic somehow negative_mask = T.bitwise_not(same_identity_mask) # XOR ensures that the same sample is paired with itself positive_mask = T.bitwise_xor(same_identity_mask, K.eye(batch_size, dtype='bool')) #print(K.int_shape(y_true)) #print(K.int_shape(y_pred)) #positive_mask = T.bitwise_xor(same_identity_mask, T.eye(K.int_shape(y_true)[0])) furthest_positive = K.max(dist_mat * positive_mask, axis=1) #closest_negative = K.min(dist_mat*negative_mask + np.inf*same_identity_mask, axis=1) closest_negative = K.min(dist_mat * negative_mask + 1e6 * same_identity_mask, axis=1) loss = final_loss_tensor(furthest_positive, closest_negative) return loss
def f(target, score): # Compute mask (-1 for different class, 1 for same class, 0 for diagonal) mask = (2 * K.equal(0, target - K.reshape(target, (-1, 1))) - 1) mask = (mask - K.eye(score.shape[0])) # Compute distance between rows mag = (score**2).sum(axis=-1) mag = K.tile(mag, (mag.shape[0], 1)) dist = (mag + mag.T - 2 * score.dot(score.T)) dist = K.sqrt(K.maximum(0, dist)) # Negative component (points from different class should be far) l_n = K.sum((K.exp(margin - dist) * K.equal(mask, -1)), axis=-1) l_n = K.tile(l_n, (score.shape[0], 1)) l_n = K.log(l_n + K.transpose(l_n)) l_n = l_n * K.equal(mask, 1) # Positive component (points from same class should be close) l_p = dist * K.equal(mask, 1) loss = K.sum((K.maximum(0, l_n + l_p)**2)) n_pos = K.sum(K.equal(mask, 1)) loss /= (2 * n_pos) return loss
def call(self, input, **kargs): if K.ndim(input) > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = K.flatten(input) actv = K.batch_dot(input, self.kernel, [[1], [0]]) abs_dif = (K.sum(K.abs( K.expand_dims(K.permute_dimensions(actv, [0, 1, 2])) - K.expand_dims(K.permute_dimensions(actv, [1, 2, 0]), 0)), axis=2) + 1e6 * K.expand_dims(K.eye(K.int_shape(input)[0]), 1)) if self.init_arg: mean_min_abs_dif = 0.5 * K.mean(K.min(abs_dif, axis=2), axis=0) abs_dif /= K.expand_dims(K.expand_dims(mean_min_abs_dif, 0)) self.init_updates = [ (self.log_weight_scale, self.log_weight_scale - K.expand_dims(K.log(mean_min_abs_dif))) ] f = K.sum(K.exp(-abs_dif), axis=2) if self.init_arg: mf = K.mean(f, axis=0) f -= K.expand_dims(mf, 0) self.init_updates += [(self.bias, -mf)] else: f += K.expand_dims(self.bias, 0) return K.concatenate([input, f], axis=1)
def trace_loss(transition_matrices, num_labels, beta): """ Implementation of the generation of transition matrices based on the features, as proposed by Luo et al.: "Learning with Noise: Enhance Distantly Supervised Relation Extractionwith Dynamic Transition Matrix". ACL 2017. This implements Formula 5 or 6, depending on how it is added to the model. The input is the tensor obtained from the DynamicTransitionMatrixGeneration layer, the number of labels (i.e. number of rows or columns of the dynamic transition matrix) and the beta scalar that scales this loss. The negative value of the trace is used (as in the paper). That means that a large, positive beta will push the model towards the identity matrix, while a negative beta will push the generated transition matrices towards the off diagonals (noisy settings). """ eye_tensor = K.eye(num_labels) def trace_loss_function(y_true, y_pred): # Obtaining trace by multiplying with the identity matrix # and then summing. This sums up over all identify matrices, # but this is fine since the beta factor is the same for # all instances in formula 5. For formula 6, if different # beta factors should be taken into account, different # models with different instanciations of this loss # need to be compiled. # return beta * -K.sum(transition_matrices * eye_tensor) return trace_loss_function
def __call__(self, x): xshape = K.int_shape(x) if self.axis is 'last': x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) xx = K.dot(K.transpose(x), x) return self.gamma * K.sum( K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[-1]))) elif self.axis is 'first': x = K.reshape(x, (xshape[0], -1)) x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True)) xx = K.dot(x, K.transpose(x)) return self.gamma * K.sum( K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[0])))
def ipca_model_shap(dense2, predict, n_concept, input_size, concept_matrix): """returns model that calculates of SHAP.""" pool1f_input = Input(shape=(input_size,), name='cluster1') concept_mask = Input(shape=(n_concept,), name='mask') proj_weight = Weight((input_size, n_concept))(pool1f_input) concept_mask_r = Lambda(lambda x: K.mean(x, axis=0, keepdims=True))( concept_mask) proj_weight_m = Lambda(lambda x: x[0] * x[1])([proj_weight, concept_mask_r]) eye = K.eye(n_concept) * 1e-10 proj_recon_t = Lambda( lambda x: K.dot(x, tf.linalg.inv(K.dot(K.transpose(x), x) + eye)))( proj_weight_m) proj_recon = Lambda(lambda x: K.dot(K.dot(x[0], x[2]), K.transpose(x[1])))( [pool1f_input, proj_weight_m, proj_recon_t]) fc2_pr = dense2(proj_recon) softmax_pr = predict(fc2_pr) finetuned_model_pr = Model( inputs=[pool1f_input, concept_mask], outputs=softmax_pr) finetuned_model_pr.compile( loss='categorical_crossentropy', optimizer=SGD(lr=0.000), metrics=['accuracy']) finetuned_model_pr.summary() finetuned_model_pr.layers[-7].set_weights([concept_matrix]) return finetuned_model_pr
def build(self, input_shape): if self.kernel is None: (p, l, u, u_diag_sign, u_diag_abs_log, l_mask, u_mask) = self.initializer(input_shape) self.kernel_p = self.add_weight(name='kernel_p', shape=p.shape, initializer=lambda _: p, trainable=False) self.kernel_l = self.add_weight(name='kernel_l', shape=l.shape, initializer=lambda _: l, trainable=True) self.kernel_u = self.add_weight(name='kernel_u', shape=u.shape, initializer=lambda _: u, trainable=True) self.kernel_u_diag_sign = self.add_weight( name='kernel_u_diag_sign', shape=u_diag_sign.shape, initializer=lambda _: u_diag_sign, trainable=False) self.kernel_u_diag_abs_log = self.add_weight( name='kernel_u_diag_abs_log', shape=u_diag_abs_log.shape, initializer=lambda _: u_diag_abs_log, trainable=True) self.kernel_l = self.kernel_l * l_mask + K.eye(input_shape[-1]) self.kernel_u = self.kernel_u * u_mask + K.tf.diag( self.kernel_u_diag_sign * K.exp(self.kernel_u_diag_abs_log)) self.kernel = K.dot(K.dot(self.kernel_p, self.kernel_l), self.kernel_u)
def uncorrelated_feature(self, x): if(self.encoding_dim <= 1): return 0.0 else: output = K.sum(K.square( self.covariance - tf.math.multiply(self.covariance, K.eye(self.encoding_dim)))) return output
def categorical_accuracy(y_true, y_pred): """ Return a categorical accuracy tensor for label and prediction tensors. Args: y_true: the ground truth labels to compare against y_pred: the predicted labels from a loss network Returns: a tensor of the categorical accuracy between truth and predictions """ # get number of labels to calculate IoU for num_classes = K.int_shape(y_pred)[-1] # set the weights to all 1 if there are none specified _weights = np.ones(num_classes) if weights is None else weights # convert the one-hot tensors into discrete label tensors with ArgMax y_true = K.flatten(K.argmax(y_true, axis=-1)) y_pred = K.flatten(K.argmax(y_pred, axis=-1)) # calculate the confusion matrix of the ground truth and predictions confusion = confusion_matrix(y_true, y_pred, num_classes=num_classes) # confusion will return integers, but we need floats to multiply by eye confusion = K.cast(confusion, K.floatx()) # extract the number of correct guesses from the diagonal correct = _weights * K.sum(confusion * K.eye(num_classes), axis=-1) # extract the number of total values per class from ground truth total = _weights * K.sum(confusion, axis=-1) # calculate the total accuracy return K.sum(correct) / K.sum(total)
def trace(tensors, keepdims=False): # trace of a squared matrix with K.name_scope('trace'): shape = mixed_shape(tensors) int_shape = K.int_shape(tensors) if not equal_int_shape([int_shape[-1]], [int_shape[-2]]): raise ValueError( "The matrix dimension (the two last dimensions) of the tensor must be squared. " "You provide: " + str(int_shape[-2:]) + ".") if int_shape[-1] is None and int_shape[-2] is None: raise ValueError( 'At least one dimension of the matrix must be defined. You provide: ' + str(int_shape)) # K.eye() doesn't accept placeholders. Thus, one dim must be specified. if int_shape[-1] is None: matrix_dim = shape[-2] else: matrix_dim = shape[-1] t = K.sum(tensors * K.eye(matrix_dim), axis=[-1, -2]) if keepdims: t = K.expand_dims(K.expand_dims(t, -1), -1) return t
def ortho_reg(weight_matrix): # orthogonal regularization for aspect embedding matrix w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg * reg
def loss(y_true, y_pred): multiplier = K.ones((50, 50)) - K.eye(50) #pdb.set_trace() multiplier = K.expand_dims(multiplier, axis=0) multiplier = K.repeat_elements(multiplier, 50, 0) curLoss = K.maximum((m - y_pred) * multiplier, 0.) return l2 * curLoss
def call(self, x): assert isinstance(x, list) inp_a, inp_b = x outp_a = K.l2_normalize(inp_a, -1) outp_b = K.l2_normalize(inp_b, -1) alpha = K.batch_dot(outp_b, outp_a, axes=[2, 2]) alpha = K.l2_normalize(alpha, 1) alpha = K.one_hot(K.argmax(alpha, 1), K.int_shape(inp_a)[1]) hmax = K.batch_dot(alpha, outp_b, axes=[1, 1]) kcon = K.eye(K.int_shape(inp_a)[1], dtype='float32') m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_hmax = hmax * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_hmax = K.l2_normalize(outp_hmax, -1) outp = K.batch_dot(outp_hmax, outp_a, axes=[2, 2]) outp = K.sum(outp * kcon, -1, keepdims=True) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m[0] return [persp, persp]
def gaussian(x): # the last dimensions of y_true and y_pred should be n * (n + 1), where the first 'n' elements correspond to the # means and the last n*n elements are the entries in the covariance matrix (row-wise) # the results will be [mean, flattened precision matrix] shape = K.int_shape(x) n = int((sqrt(4 * shape[-1] + 1) - 1) / 2) # flatten both x_flat = K.reshape(x, (-1, shape[-1])) # find the predicted mean and variance for both y_true and y_pred mean = K.reshape(x_flat[:, :n], (-1, n)) cov_a = K.reshape(x_flat[:, n:], (-1, n, n)) # compute (cov_a) (cov_a^T) + I precision = K.batch_dot(K.permute_dimensions(cov_a, (0, 2, 1)), cov_a, axes=[1, 2]) + K.expand_dims(K.eye(n), axis=0) # merge them together merged = K.concatenate([mean, K.reshape(precision, (-1, n * n))], axis=-1) # un-flatten it return K.reshape(merged, K.shape(x))
def call(self, x): assert isinstance(x, list) inp_a, inp_b = x outp_a = K.l2_normalize(inp_a, -1) outp_b = K.l2_normalize(inp_b, -1) alpha = K.batch_dot(outp_b, outp_a, axes=[2, 2]) alpha = K.l2_normalize(alpha, 1) alpha = K.one_hot(K.argmax(alpha, 1), K.int_shape(inp_a)[1]) hmax = K.batch_dot(alpha, outp_b, axes=[1, 1]) kcon = K.eye(K.int_shape(inp_a)[1], dtype='float32') m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_hmax = hmax * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_hmax = K.l2_normalize(outp_hmax, -1) outp = K.batch_dot(outp_hmax, outp_a, axes=[2, 2]) outp = K.sum(outp * kcon, -1, keepdims=True) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m return [persp, persp]
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) b = K.zeros_like(u_hat_vecs[:, :, :, 0]) d = K.eye(self.num_capsule) for i in range(self.routings): c = softmax(b, 1) c = c * d o = K.batch_dot(c, u_hat_vecs, [2, 2]) if K.backend() == 'theano': o = K.sum(o, axis=1) if i < self.routings - 1: o = K.l2_normalize(o, -1) b = K.batch_dot(o, u_hat_vecs, [2, 3]) if K.backend() == 'theano': b = K.sum(b, axis=1) return self.activation(o)
def test_make_soft(_log, train_with_soft_target_stdev, _config): if train_with_soft_target_stdev is None: _config['train_with_soft_target_stdev'] = 1 y_true = K.reshape(K.eye(512)[:129, :256], (2, 129, 256)) y_soft = make_soft(y_true) f = K.function([], y_soft) _log.info('Output of soft:') f1 = f([]) _log.info(f1[0, 0]) _log.info(f1[-1, -1])
def __loss(y_true, y_pred): kernel_cs_forward, kernel_cs_backward = [], [] for (forward, backward) in layers: kernel_c_forward = forward.cell.trainable_weights[1][:, rnn_units * 2:rnn_units * 3] kernel_c_backward = backward.cell.trainable_weights[1][:, rnn_units * 2:rnn_units * 3] kernel_cs_forward.append(K.reshape(kernel_c_forward, (rnn_units * rnn_units,))) kernel_cs_backward.append(K.reshape(kernel_c_backward, (rnn_units * rnn_units,))) phi_forward = K.stack(kernel_cs_forward) phi_backward = K.stack(kernel_cs_backward) loss_sim_forward = K.sum(K.square(K.dot(phi_forward, K.transpose(phi_forward)) - K.eye(len(layers)))) loss_sim_backward = K.sum(K.square(K.dot(phi_backward, K.transpose(phi_backward)) - K.eye(len(layers)))) loss_cat = keras.losses.categorical_crossentropy(y_true, y_pred) return loss_cat + lmbd * (loss_sim_forward + loss_sim_backward)
def __call__(self, x): xshape = K.int_shape(x) if self.axis is 'last': x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) xx = K.dot(K.transpose(x), x) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[-1]))) elif self.axis is 'first': x = K.reshape(x, (xshape[0], -1)) x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True)) xx = K.dot(x, K.transpose(x)) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[0])))