def __call__(self, loss): if not hasattr(self, 'layer'): raise Exception('Need to call `set_layer` on ' 'ActivityRegularizer instance ' 'before calling the instance.') output = self.layer.output regularized_loss = loss + self.l1 * K.sum(K.mean(K.abs(output), axis=0)) regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0)) return K.in_train_phase(regularized_loss, loss)
def contrastive_loss(y, d): """ Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf """ margin = 1 return K.mean(y * K.square(d) + (1 - y) * K.square(K.maximum(margin - d, 0)))
def contrastive_loss_over_distance(labels, distances): ''' :param labels: 1D tensor containing 0 or 1 for each example :param distances: :return: ''' margin = 1 # loss = K.mean((distances + K.maximum(margin-shifted_distances, 0))) print(K.eval(distances)) right = margin - distances print(K.eval(right)) right = K.maximum(right, 0) print(K.eval(right)) right = K.square(right) print(K.eval(right)) print "" print(K.eval(distances)) left = distances print(K.eval(left)) left = K.square(left) print(K.eval(left)) left = labels * left print(K.eval(left)) right = (1 - labels) * right print(K.eval(right)) loss = K.mean(left + right) print(K.eval(loss)) # loss = K.mean(distances - shifted_distances) return loss
def euclideanSqDistance(inputs): if (len(inputs) != 2): raise 'oops' output = K.mean(K.square(inputs[1] - inputs[0]), axis=-1) output = K.expand_dims(output, 1) return output
def build_model(self): """Build LSTM model""" from numpy.random import seed seed(1) from tensorflow import set_random_seed set_random_seed(2) # cbow = Sequential() # cbow.add(Embedding(input_dim=self.vocabulary, output_dim=self.embedding_dim, input_length=self.window * 2)) # cbow.add(Lambda(lambda x: K.mean(x, axis=1), output_shape=(self.embedding_dim,))) # cbow.add(Dense(self.vocabulary, activation='softmax')) # cbow.compile(loss=self.loss, optimizer=self.optimizer) cbow = Input(shape=( self.window * 2, self.vocabulary, )) cbow = Embedding(input_dim=self.vocabulary, output_dim=self.embedding_dim, input_length=self.window * 2)(cbow) cbow = Lambda(lambda x: K.mean(x, axis=1), output_shape=(self.embedding_dim, ))(cbow) self.encoder = cbow cbow = Dense(self.vocabulary, activation='softmax')(cbow) cbow.compile(loss=self.loss, optimizer=self.optimizer) cbow.summary() self.model = cbow
def spatial_attention(input_feature): kernel_size = 7 if K.image_data_format() == "channels_first": channel = input_feature._keras_shape[1] cbam_feature = Permute((2, 3, 1))(input_feature) else: channel = input_feature._keras_shape[-1] cbam_feature = input_feature avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature) assert avg_pool._keras_shape[-1] == 1 max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature) assert max_pool._keras_shape[-1] == 1 concat = Concatenate(axis=3)([avg_pool, max_pool]) assert concat._keras_shape[-1] == 2 cbam_feature = Conv2D(filters=1, kernel_size=kernel_size, strides=1, padding='same', activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(concat) assert cbam_feature._keras_shape[-1] == 1 if K.image_data_format() == "channels_first": cbam_feature = Permute((3, 1, 2))(cbam_feature) return multiply([input_feature, cbam_feature])
def contrastive_loss_old(labels, dists): label_first = labels[0:1, :] other_labels = labels[1:, :] labels_shifted = K.concatenate( [labels, other_labels, label_first], axis=0) # [ l1 ........ ln | l2 ... ln-1 ln ] labels_orig = K.concatenate( [labels, labels], axis=0) # [ l1 ........ ln | l1 ... ln-2 ln ] zeros = K.zeros_like(labels_orig) # [ 0 ........ 0 | 0 ... 0 0 ] h = K.cast(K.equal(labels_orig - labels_shifted, zeros), dtype='float32') # [ 1 1 ...... 1 | 0 ... 1 0 ] # h: ALL ONES | MOST ZEROS # h[i] = 1 where labels_orig[i] == labels_shifted[i] (i-th image correlated with i+1-th image, i.e. same artwork) # h[i] = 0 where labels_orig[i] != labels_shifted[i] first_dist = dists[0:1] other_dists = dists[1:] shifted_dists = K.concatenate( [dists, other_dists, first_dist], axis=0) # [ d1 ........ dn | d1 ... dn-2 dn ] # equation: Lcon = (1/2N) SUM[ h(i) d(i)^2 + (1-h(i)) max(1-d(i), 0)^2 Z = K.zeros_like(shifted_dists) max_z_sd = K.max(K.stack([1 - shifted_dists, Z]), axis=0, keepdims=False) #max_z_sd = K.sqrt(K.cast(K.shape(shifted_dists)[0], dtype='float32')) - shifted_dists first_operand = h * K.square(shifted_dists) second_operand = (1 - h) * K.square(max_z_sd) tensor_sum = first_operand + second_operand sum = K.sum(tensor_sum, axis=0) / K.cast(K.shape(shifted_dists)[0], dtype='float32') return K.mean(sum)
def bilstm_attention_model(units=(64,), dropout=(0.5,), hidden_dims=17): batch_input_shape = (batch_size, input_shape[0], input_shape[1]) model_input = Input(shape=input_shape, batch_shape=batch_input_shape) previous_layer = model_input for i, u in enumerate(units): if i != (len(units) - 1): previous_layer = Bidirectional(CuDNNLSTM(u, return_sequences=True, stateful=True))(previous_layer) else: previous_layer = Bidirectional(CuDNNLSTM(u, return_sequences=True, stateful=True))(previous_layer) attention = Dense(1, activation='tanh')(previous_layer) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(units[-1] * 2)(attention) attention = Permute([2, 1])(attention) sent_representation = merge([previous_layer, attention], mode='mul') sent_representation = Lambda(lambda x: K.mean(x, axis=-2), output_shape=(units[-1] * 2,))(sent_representation) z = Dense(hidden_dims, activation='relu')(sent_representation) model_output = Dense(1, activation='sigmoid')(z) model = Model(model_input, model_output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def custom_cross_entropy(self, y_true, y_pred): # y_true has the payoffs in the last dimension y_true, payoffs = splitter(y_true) if self.method == 'lay': tp_weight = K.abs(payoffs) fp_weight = K.abs(payoffs) tn_weight = 1 fn_weight = 0.95 elif self.method == 'back': tp_weight = K.abs(payoffs) # opportunity cost tn_weight = 0 # opportunity cost fp_weight = 1 # cost fn_weight = K.abs(payoffs) # cost loss = -K.mean( fn_weight * y_true * K.log(y_pred + _EPSILON) + # fn cost (not backing if it should) fp_weight * (1 - y_true) * K.log(1 - y_pred + _EPSILON) # fp cost (backing the wrong one) # + tp_weight * y_true * K.log(1 - y_pred + _EPSILON) # tp (correctly backing) # + tn_weight * (1 - y_true) * K.log(y_pred + _EPSILON) # tn (correctly not backing) ) return loss
def ln(self, x, slc): # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) x_normed = self.gammas[slc] * x_normed + self.betas[slc] return x_normed
def contrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf ''' margin = 1 sqaure_pred = K.square(y_pred) margin_square = K.square(K.maximum(margin - y_pred, 0)) return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)
def jaccard_coef(y_true_values, y_predictions): # __author__ = Vladimir Iglovikov intersection = K.sum(y_true_values * y_predictions, axis=[0, -1, -2]) sum_ = K.sum(y_true_values + y_predictions, axis=[0, -1, -2]) jac = (intersection + smooth) / (sum_ - intersection + smooth) return K.mean(jac)
def jaccard_coef_int(y_true_values, y_predictions): # __author__ = Vladimir Iglovikov y_pred_pos = K.round(K.clip(y_predictions, 0, 1)) intersection = K.sum(y_true_values * y_pred_pos, axis=[0, -1, -2]) sum_ = K.sum(y_true_values + y_pred_pos, axis=[0, -1, -2]) jac = (intersection + smooth) / (sum_ - intersection + smooth) return K.mean(jac)
def triplet_loss(y_true, y_pred): y_pred = K.flatten(y_pred) y_true = K.flatten(y_true) pos = y_pred[::2] neg = y_pred[1::2] margin = y_true[::2] - y_true[1::2] delta = K.maximum(margin + neg - pos, 0) return K.mean(delta, axis=-1)
def contrastive_loss_contr_data(labels, output): distances = K.sqrt(K.sum(K.square(output - other_output), axis=0)) #loss = K.mean((distances + K.maximum(margin-shifted_distances, 0))) #loss = K.mean(K.square(distances) + K.square(K.maximum(margin-shifted_distances, 0))) #loss = K.mean(distances - shifted_distances) loss = K.mean((labels) * K.square(distances) + (1 - labels) * K.square(K.maximum(margin - distances, 0))) return loss
def margin_loss(y_true, y_pred): """ Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it. :param y_true: [None, n_classes] :param y_pred: [None, num_capsule] :return: a scalar loss value. """ L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \ 0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1)) return K.mean(K.sum(L, 1))
def categorical_squared_hinge(y_true, y_pred): """ hinge with 0.5*W^2 ,SVM """ y_true = 2. * y_true - 1 # trans [0,1] to [-1,1],注意这个,svm类别标签是-1和1 vvvv = K.maximum(1. - y_true * y_pred, 0.) # hinge loss,参考keras自带的hinge loss # vvv = K.square(vvvv) # 文章《Deep Learning using Linear Support Vector Machines》有进行平方 vv = K.sum(vvvv, 1, keepdims=False) #axis=len(y_true.get_shape()) - 1 v = K.mean(vv, axis=-1) return v
def attention_3d_block(inputs): # inputs.shape = (batch_size, time_steps, input_dim) input_dim = int(inputs.shape[2]) a = Permute((2, 1))(inputs) # a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what. a = Dense(n_nodes, activation='softmax', name='weighting')(a) if SINGLE_ATTENTION_VECTOR: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = multiply([inputs, a_probs], name='attention_mul') return output_attention_mul
def attention_3d_block(inputs): # inputs.shape = (batch_size, time_steps, input_dim) # (batch_size, max_len, embed_dim) input_dim = int(inputs.shape[2]) a = Permute((2, 1))(inputs) a = Reshape((input_dim, MAX_LEN))(a) # this line is not useful. It's just to know which dimension is what. a = Dense(MAX_LEN, activation='softmax')(a) if SINGLE_ATTENTION_VECTOR: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul') return output_attention_mul
def triplet_loss(y_true, y_pred): y_pred = K.l2_normalize(y_pred, axis=1) batch = BAT_SIZE # print(batch) ref1 = y_pred[0:batch, :] pos1 = y_pred[batch:batch + batch, :] neg1 = y_pred[batch + batch:3 * batch, :] dis_pos = K.sum(K.square(ref1 - pos1), axis=1, keepdims=True) dis_neg = K.sum(K.square(ref1 - neg1), axis=1, keepdims=True) dis_pos = K.sqrt(dis_pos) dis_neg = K.sqrt(dis_neg) a1 = 0.6 d1 = dis_pos + K.maximum(0.0, dis_pos - dis_neg + a1) return K.mean(d1)
def attention_3d_block(inputs): # inputs.shape = (batch_size, time_steps, input_dim) input_dim = int(inputs.shape[2]) time_steps = int(inputs.shape[1]) a = Permute((2, 1))(inputs) a = Reshape( (input_dim, time_steps) )(a) # this line is not useful. It's just to know which dimension is what. a = Dense(time_steps, activation='softmax')(a) a = Lambda(lambda x: K.mean(x, axis=1))(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1))(a) output_attention_mul = multiply([inputs, a_probs]) return output_attention_mul
def contrastive_loss_over_distance(labels, distances): ''' :param labels: 1D tensor containing 0 or 1 for each example :param distances: :return: ''' # loss = K.mean((distances + K.maximum(margin-shifted_distances, 0))) # loss = K.mean(distances - shifted_distances) loss = K.mean((labels) * K.square(distances) + (1 - labels) * K.square(K.maximum(margin - distances, 0))) #loss_pos = (labels)*K.square(distances) #loss_neg = (1-labels) * K.square(K.maximum(margin-distances, 0)) return loss
def avg_batch_mse_loss(y_true, y_pred): # batch_size = K.int_shape(y_pred)[-1] # loss = 0 # for i in range(0, batch_size): # loss += mean_squared_error(y_pred[i], y_pred[i-1]) # loss/=batch_size # max distance in the euclidean spaces with domain [0, 1] for each dimensionality, is the square root of the number # of dimensions (1 for 1D, 1.414.. 2D, 1.73 3D, 2 4D....) y_pred_first_row = y_pred[0:1, :] y_pred_other_rows = y_pred[1:, :] y_pred_shifted = K.concatenate([y_pred_other_rows, y_pred_first_row], axis=0) return -K.mean(K.square(y_pred - y_pred_shifted))
def contrastive_loss_2(labels, im_outputs): distances = K.sqrt(K.sum(K.square(im_outputs - text_outputs), axis=-1)) first_text = text_outputs[0:1, :] last_texts = text_outputs[1:, :] shifted_texts = K.concatenate([last_texts, first_text], axis=0) shifted_distances = K.sqrt( K.sum(K.square(im_outputs - shifted_texts), axis=-1)) #loss = K.mean((distances + K.maximum(margin-shifted_distances, 0))) loss = K.mean((K.square(distances) + K.square(K.maximum(margin - shifted_distances, 0)))) #loss = K.mean(distances - shifted_distances) return loss
def attention_3d_block(inputs, time_steps: int, single_attention_vector: bool): # inputs.shape = (batch_size, time_steps, input_dim) print(inputs.shape) input_dim = int(inputs.shape[2]) print(input_dim) a = Permute((2, 1))(inputs) a = Reshape( (input_dim, time_steps) )(a) # this line is not useful. It's just to know which dimension is what. a = Dense(time_steps, activation='softmax')(a) if single_attention_vector: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul') return output_attention_mul
def profit(y_true, y_pred): y_true, payoffs = splitter(y_true) profit = K.round(y_pred) * payoffs return K.mean(profit, axis=-1)
def acc(y_true, y_pred): y_true, payoffs = splitter(y_true) return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1)
def sampleLoss(true_y, pred_y): z_mean = pred_y[:, :, 0] z_log_sigma = pred_y[:, :, 1] return -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
def mse_on_sub_out_loss(y_true, y_pred): # NB: mean_squared_errror(a, b) = K.mean(K.square(a-b)) # We use this mse_on_sub_output over the (im_emb-tx_emb) output of the network to compute MSE(im_emb, tx_emb) # return mean_squared_error(y_true, y_pred) return K.mean(K.square(y_pred))
def binary_accuracy(y_true, y_pred): return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1)
def cos_distance(y_true, y_pred): y_true = K.l2_normalize(y_true, axis=-1) y_pred = K.l2_normalize(y_pred, axis=-1) return K.mean(1 - K.sum((y_true * y_pred), axis=-1))