def my_loss(y_true, y_pred): r"""Homemade weighted loss function Uses the standard Mean Absolute Error (MAE) error function, but weighs it based on the distance from 0 the true value has Formula ------- .. math:: \overline{|{y_{_{true}} - y_{_{pred}}}|} \cdot 2 - \frac{y_{_{true}} - y_{_{true_{min}}}}{y_{_{true_{max}}} - y_{_{true_{min}}}} where .. math:: y_{_{true}} = \text{array of true values} .. math:: y_{_{pred}} = \text{array of predictions from keras} .. math:: y_{_{true_{min}}} = \text{minimum value in true values} .. math:: y_{_{true_{max}}} = \text{maximum value in true values} Parameters ---------- y_true : array The ground truth as provided by Keras. y_pred : array The predictions as provided by Keras. Returns ------- weighted_loss : array The weighted loss. """ absErr = K.mean(K.abs(y_pred - y_true), axis=-1) num = y_true - K.min(y_true, axis=0, keepdims=True) den = K.max(y_true, axis=0, keepdims=True) - K.min( y_true, axis=0, keepdims=True) factor = 2 - (num / den) weighted_loss = absErr * K.mean(factor) return weighted_loss
def softmax(x, axis=1): """Softmax activation function. # Arguments x : Tensor. axis: Integer, axis along which the softmax normalization is applied. # Returns Tensor, output of softmax transformation. # Raises ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
def call(cls, y_true, y_pred): """ Call the L-inf norm loss function. Parameters ---------- y_true: tensor or variable The ground truth value y_pred: tensor or variable The predicted value Returns ------- tensor The loss value """ diff = K.abs(y_true - y_pred) max_loss = K.max(diff, axis=(1, 2), keepdims=True) loss = K.mean(max_loss, axis=-1) return loss
def update_branch(): """ Update the moving average when is_ema_training is True.""" # Set the qnoise factor to 0 to update the EMA using the unquantized input prev_qnoise_factor = tf.identity(self.quantizer.qnoise_factor) self.quantizer.update_qnoise_factor(tf.constant(0.0)) # Update the EMA act_x = self.quantizer( x) # act_x is the input after the activation # function, but before the quantizer. This is # done by using a qnoise_factor of 0 new_min = tf.squeeze(K.min(act_x, axis=axis, keepdims=True)) K.moving_average_update(self.ema_min, new_min, self.ema_decay) new_max = tf.squeeze(K.max(act_x, axis=axis, keepdims=True)) K.moving_average_update(self.ema_max, new_max, self.ema_decay) # Reset the qnoise factor to the previous value self.quantizer.update_qnoise_factor(prev_qnoise_factor)
def _wavelet_hs_branch(input_data, scale_length, nfilters, kernel_initializer, nlayers, se_block, skip_connection): hs = input_data hs = Conv2D(nfilters, (scale_length, 1), activation='relu', padding='same', kernel_initializer=kernel_initializer, name='wavelet_hs_conv_0')(hs) hs = BatchNormalization(name='wavelet_hs_bn_0')(hs) hs = AveragePooling2D((2, 1), padding='same', name='wavelet_hs_avgpool_0')(hs) # (?, /2, 10000) hs = Conv2D(nfilters, (scale_length // 2, 1), activation='relu', padding='same', kernel_initializer=kernel_initializer, name='wavelet_hs_conv_1')(hs) hs = BatchNormalization(name='wavelet_hs_bn_1')(hs) hs = AveragePooling2D((2, 1), padding='same', name='wavelet_hs_avgpool_1')(hs) # (?, /4, 10000) for i in range(nlayers): shortcut = hs hs = Conv2D(nfilters, 3, activation='relu', padding='same', kernel_initializer=kernel_initializer, name='wavelet_hs_conv_{}'.format(i + 2))(hs) hs = BatchNormalization(name='wavelet_hs_bn_{}'.format(i + 2))(hs) if se_block: hs = squeeze_excite_block_2d(hs) if i != 0 and skip_connection: hs = Add(name='wavelet_skip_merge_{}'.format(i + 2))([hs, shortcut]) hs = MaxPooling2D((2, 2), padding='same', name='wavelet_maxpool_{}'.format(i + 2))(hs) hs = Lambda(lambda hs: K.max(hs, axis=1))(hs) return hs
def call(self, y_true, y_pred): y_true, y_pred = K.batch_flatten(y_true), K.batch_flatten(y_pred) col_shape = K.shape(y_true)[1] error = y_pred - y_true abs_error = K.abs(error) # delta is chosen here delta = 0.2 * K.max(abs_error, axis=1) delta = K.expand_dims(delta) # delta is now (batch, 1) # delta is the same shape a y_pred/y_true delta = tf.tile(delta, [1, col_shape]) # small error is abs_error # big error formula is : big_error = K.square(error) + K.square(delta) big_error = tf.divide(big_error, (2 * delta) + K.epsilon()) berhu = tf.where(tf.greater(error, delta), big_error, abs_error) return tf.reduce_mean(berhu)
def attack_statistics(x_true, x_adv): if type(x_true) == type(np.array([])): L1 = np.mean(np.sum(np.abs(x_adv - x_true), axis=(-1, -2, -3))) L2 = np.mean( np.sqrt(np.sum(np.square(x_true - x_pred), axis=(-1, -2, -3)))) Linf = np.mean(np.max(np.abs(x_true - x_adv), axis=(-1, -2, -3))) eps = 1 / 256 mod_perc = 100 * np.mean( np.greater(np.abs(x_true - x_adv), eps).astype('float')) return {'L1': L1, 'L2': L2, 'Linf': Linf, '%pix': mod_perc} # calculate average L1,L2,Linf norms # as well as % of pixels modified L1 = tf.reduce_mean(K.sum(K.abs(x_adv - x_true), axis=(-1, -2, -3))) L2 = tf.reduce_mean( K.sqrt(K.sum(K.square(x_adv - x_true), axis=(-1, -2, -3)))) Linf = tf.reduce_mean(K.max(K.abs(x_true - x_adv), axis=(-1, -2, -3))) eps = tf.constant(1 / 256, shape=x_true.shape.as_list()[1:]) mod_perc = 100 * tf.reduce_mean( K.cast(K.greater(K.abs(x_true - x_adv), eps), dtype='float')) return {'L1': L1, 'L2': L2, 'Linf': Linf, '%pix': mod_perc}
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): query, key, value = inputs else: query = key = value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt( K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) # prepare softmax if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon() ) # finish softmax v = K.batch_dot(a, value) if self.return_attention: return [v, a] return v
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
def spatial_attention(channel_refined_feature ): # channel_refined_feature (None,piece_len,3,64) maxpool_spatial = kl.Lambda(lambda x: k.max(x, axis=3, keepdims=True))( channel_refined_feature) # (None,piece_len,3,1) avgpool_spatial = kl.Lambda(lambda x: k.mean(x, axis=3, keepdims=True))( channel_refined_feature) # (None,piece_len,3,1) max_avg_pool_spatial = kl.Concatenate(axis=3)( [maxpool_spatial, avgpool_spatial]) # (None,piece_len,3,2) spatial_attention_feature = kl.Conv2D( filters=1, kernel_size=(2, 2), padding="same", activation='relu')(max_avg_pool_spatial) # conv(None,piece_len,3,1) multiply_channel_spatial = kl.Multiply()( [channel_refined_feature, spatial_attention_feature]) # (None,piece_len,3,64) return multiply_channel_spatial
def call(self, inputs, mask=None): atoms, bonds, edges = inputs # For each atom, look up the featues of it's neighbour neighbour_atom_features = neighbour_lookup(atoms, edges, maskvalue=-inf, include_self=True) # Take max along `degree` axis (2) to get max of neighbours and self max_features = K.max(neighbour_atom_features, axis=2) # atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) # backend cast to floatx: atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), K.floatx()), axis=-1, keepdims=True) general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx()) return max_features * general_atom_mask
def call(self, inputs, mask=None, **kwargs): key, query, value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt( K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) v = K.batch_dot(a, value) return v
def call(self, logits): # logits: [BATCH_SIZE, d] logits_ = K.expand_dims(logits, -2) # [BATCH_SIZE, 1, d] batch_size = tf.shape(logits_)[0] d = tf.shape(logits_)[2] uniform = tf.random.uniform(shape=(batch_size, self.k, d), minval=np.finfo(tf.float32.as_numpy_dtype).tiny, maxval=1.0) gumbel = - K.log(-K.log(uniform)) noisy_logits = (gumbel + logits_) / self.tau0 samples = K.softmax(noisy_logits) samples = K.max(samples, axis=1) # Explanation Stage output. threshold = tf.expand_dims(tf.nn.top_k(logits, self.k, sorted=True)[0][:, -1], -1) discrete_logits = tf.cast(tf.greater_equal(logits, threshold), tf.float32) return K.in_train_phase(samples, discrete_logits)
def get_box(y_pred, score_threshold, iou_threshold): coord_x = tf.cast(tf.reshape(tf.tile(tf.range(config.image_size // config.scale), [config.image_size // config.scale]), (1, config.image_size // config.scale, config.image_size // config.scale, 1, 1)), tf.float32) coord_y = tf.transpose(coord_x, (0, 2, 1, 3, 4)) coords = tf.tile(tf.concat([coord_x, coord_y], -1), [1, 1, 1, len(config.anchors) // 2, 1]) dims = backend.cast_to_floatx(backend.int_shape(y_pred)[1:3]) dims = backend.reshape(dims, (1, 1, 1, 1, 2)) anchors = config.anchors anchors = anchors.reshape(len(anchors) // 2, 2) pred_xy = backend.sigmoid(y_pred[:, :, :, :, 0:2]) pred_xy = (pred_xy + coords) pred_xy = pred_xy / dims pred_wh = backend.exp(y_pred[:, :, :, :, 2:4]) pred_wh = (pred_wh * anchors) pred_wh = pred_wh / dims box_conf = backend.sigmoid(y_pred[:, :, :, :, 4:5]) box_class_prob = backend.softmax(y_pred[:, :, :, :, 5:]) pred_xy = pred_xy[0, ...] pred_wh = pred_wh[0, ...] box_conf = box_conf[0, ...] box_class_prob = box_class_prob[0, ...] box_xy1 = pred_xy - 0.5 * pred_wh box_xy2 = pred_xy + 0.5 * pred_wh _boxes = backend.concatenate((box_xy1, box_xy2), -1) box_scores = box_conf * box_class_prob box_classes = backend.argmax(box_scores, -1) box_class_scores = backend.max(box_scores, -1) prediction_mask = box_class_scores >= score_threshold _boxes = tf.boolean_mask(_boxes, prediction_mask) scores = tf.boolean_mask(box_class_scores, prediction_mask) _classes = tf.boolean_mask(box_classes, prediction_mask) _boxes = _boxes * config.image_size selected_idx = tf.image.non_max_suppression(_boxes, scores, config.max_boxes, iou_threshold) return backend.gather(_boxes, selected_idx), backend.gather(_classes, selected_idx)
def _soft_alignment(self, inputs): """ Compute the soft alignment between the elements of two sentences. Args: inputs: A list of two elements, the first is a tensor of attention weights, the second is the encoded sentence on which to compute the alignments. Returns: A tensor containing the alignments. """ attention = inputs[0] sentence = inputs[1] # Subtract the max. from the attention weights to avoid overflows. exp = K.exp(attention - K.max(attention, axis=-1, keepdims=True)) exp_sum = K.sum(exp, axis=-1, keepdims=True) softmax = exp / exp_sum return K.batch_dot(softmax, sentence)
def amplitude_to_decibel(x, amin=1e-10, dynamic_range=80.0): """[K] Convert (linear) amplitude to decibel (log10(x)). Parameters ---------- x: Keras *batch* tensor or variable. It has to be batch because of sample-wise `K.max()`. amin: minimum amplitude. amplitude smaller than `amin` is set to this. dynamic_range: dynamic_range in decibel """ log_spec = 10 * K.log(K.maximum(x, amin)) / np.log(10).astype(K.floatx()) if K.ndim(x) > 1: axis = tuple(range(K.ndim(x))[1:]) else: axis = None log_spec = log_spec - K.max(log_spec, axis=axis, keepdims=True) # [-?, 0] log_spec = K.maximum(log_spec, -1 * dynamic_range) # [-80, 0] return log_spec
def encoder(self, text_embed, return_sequence): # We shift the document to the right to obtain the left-side contexts l_embedding = Lambda(lambda x: K.concatenate([K.zeros(shape=(K.shape(x)[0], 1, K.shape(x)[-1])), x[:, :-1]], axis=1))(text_embed) # We shift the document to the left to obtain the right-side contexts r_embedding = Lambda(lambda x: K.concatenate([K.zeros(shape=(K.shape(x)[0], 1, K.shape(x)[-1])), x[:, 1:]], axis=1))(text_embed) # use LSTM RNNs instead of vanilla RNNs as described in the paper. forward = LSTM(300, return_sequences=True)(l_embedding) # See equation (1) backward = LSTM(300, return_sequences=True, go_backwards=True)(r_embedding) # See equation (2) # Keras returns the output sequences in reverse order. backward = Lambda(lambda x: K.reverse(x, axes=1))(backward) together = concatenate([forward, text_embed, backward], axis=2) # See equation (3). # use conv1D instead of TimeDistributed and Dense semantic = Conv1D(300, kernel_size=1, activation="tanh")(together) # See equation (4). if return_sequence: return semantic sentence_embed = Lambda(lambda x: K.max(x, axis=1))(semantic) # See equation (5). return sentence_embed
def obj_loss(self, y_true, y_pred): b_o = calculate_ious(y_true, y_pred, use_iou=self.readjust_obj_score) b_o_pred = y_pred[..., 4] num_true_labels = self.grid_size[0] * self.grid_size[ 1] * self.nb_anchors y_true_p = K.reshape(y_true[..., :4], shape=(self.batch_size, 1, 1, 1, num_true_labels, 4)) iou_scores_buff = calculate_ious(y_true_p, K.expand_dims(y_pred, axis=4)) best_ious = K.max(iou_scores_buff, axis=4) indicator_noobj = K.cast(best_ious < self.iou_filter, np.float32) * ( 1 - y_true[..., 4]) * self.lambda_noobj indicator_obj = y_true[..., 4] * self.lambda_obj indicator_o = indicator_obj + indicator_noobj loss_obj = K.sum(K.square(b_o - b_o_pred) * indicator_o) return loss_obj / 2
def call(self, inputs, mask=None, **kwargs): input_len = K.shape(inputs)[1] if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.attention_width is not None: if self.history_only: lower = K.arange(input_len) - (self.attention_width - 1) else: lower = K.arange(input_len) - self.attention_width // 2 lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.tile(K.expand_dims(K.arange(input_len), axis=0), [input_len, 1]) e = e * K.cast(lower <= indices, K.floatx()) * K.cast( indices < upper, K.floatx()) if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask) e = K.permute_dimensions( K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1)) # a_{t} = \text{softmax}(e_t) s = K.sum(e, axis=-1, keepdims=True) a = e / (s + K.epsilon()) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if self.return_attention: return [v, a] return v
def _compute_target_mask(self, inputs, mask=None): input_shape = K.shape(inputs) input_type = K.dtype(inputs) mask_threshold = K.constant(1e8, dtype=input_type) channel_num = int(inputs.shape[-1]) channel_dim = K.prod(input_shape[:-1]) masked_inputs = inputs if mask is not None: masked_inputs = K.switch( K.cast(mask, K.floatx()) > 0.5, masked_inputs, K.ones_like(masked_inputs, dtype=input_type) * mask_threshold) norm = K.abs(masked_inputs) channeled_norm = K.transpose( K.reshape(norm, (channel_dim, channel_num))) weight_num = K.sum( K.reshape(K.cast(masked_inputs < mask_threshold, K.floatx()), (channel_dim, channel_num)), axis=0, ) indices = K.stack( [ K.arange(channel_num, dtype='int32'), K.cast(self.target_rate * weight_num, dtype='int32') - 1, ], axis=-1, ) threshold = -tf.gather_nd( tf.nn.top_k(-channeled_norm, k=K.max(indices[:, 1]) + 1).values, indices) threshold = K.reshape(tf.tile(threshold, [channel_dim]), input_shape) target_mask = K.switch( norm <= threshold, K.ones_like(inputs, dtype=K.floatx()), K.zeros_like(inputs, dtype=K.floatx()), ) return target_mask
def __call__(self, p): """Returns the avergage shannon entropy of the distribution(s) p """ # calculate impurity if self.entr: impurity = K.sum(p * -K.log(p + self.epsilon) / K.log(K.constant(2)), axis=-1) else: # Gini impurity impurity = 1. - K.sum(p * (1 - p), axis=-1) concentration = K.max(p, axis=-1) # calculate batch similarity ppt = K.dot(p, K.transpose(p)) similarity = K.mean(K.sum(ppt) - K.sum(tf.linalg.diag_part(ppt))) penalty = (self.gamma * K.mean(impurity) + self.theta * K.mean(concentration) + self.omega * similarity) return penalty
def call(self, inputs): if not self.norm_method: outputs = inputs elif self.norm_method == 'whole_image': axes = [3, 4] if self.channel_axis == 1 else [2, 3] outputs = inputs - K.mean(inputs, axis=axes, keepdims=True) outputs = outputs / (K.std(inputs, axis=axes, keepdims=True) + K.epsilon()) elif self.norm_method == 'std': outputs = inputs - self._average_filter(inputs) outputs = outputs / self._window_std_filter(outputs) elif self.norm_method == 'max': outputs = inputs / K.max(inputs) outputs = outputs - self._average_filter(outputs) else: raise NotImplementedError('"{}" is not a valid norm_method'.format( self.norm_method)) return outputs
def obj_loss(self, y_true, y_pred): # TODO: should make a review in this part obj_conf_true = y_true[..., 4] obj_conf_pred = y_pred[..., 4] num_true_labels = self.grid_size[0] * self.grid_size[ 1] * self.nb_anchors y_true_coords = K.reshape(y_true[..., :4], shape=(self.batch_size, 1, 1, 1, num_true_labels, 4)) iou_scores_buff = calculate_ious(y_true_coords, K.expand_dims(y_pred, axis=4)) best_ious = K.max(iou_scores_buff, axis=4) indicator_noobj = K.cast(best_ious < self.iou_filter, np.float32) * ( 1 - y_true[..., 4]) * self.lambda_noobj indicator_obj = y_true[..., 4] * self.lambda_obj indicator_obj_noobj = indicator_obj + indicator_noobj loss_obj = K.sum( K.square(obj_conf_true - obj_conf_pred) * indicator_obj_noobj) return loss_obj
def edges_depth_loss_function(y_true, y_pred, theta=0.1, maxDepthVal=1000.0 / 10.0): # Edges dy_true, dx_true = tf.image.image_gradients(y_true) dy_pred, dx_pred = tf.image.image_gradients(y_pred) # Gradient magnitude of the true depth map grad_magn = K.sqrt(K.pow(dx_true, 2) + K.pow(dy_true, 2)) # Mask to divide high freq to low frew component mask = (grad_magn - K.min(grad_magn)) / (K.max(grad_magn) - K.min(grad_magn)) # High freq and low freq depthmaps hf_y_true = (1 - mask) * y_true lf_y_true = (mask) * y_true hf_y_pred = (1 - mask) * y_pred lf_y_pred = (mask) * y_pred # MAE of low freq low_freq_loss = K.mean(K.abs(lf_y_pred - lf_y_true), axis=-1) # GRAD of hf depth dy_true_hf, dx_true_hf = tf.image.image_gradients(hf_y_true) dy_pred_hf, dx_pred_hf = tf.image.image_gradients(hf_y_pred) # MAE of hf freq high_freq_loss = K.mean(K.abs(dy_pred_hf - dy_true_hf) + K.abs(dx_pred_hf - dx_true_hf), axis=-1) # Weights w1 = 1.0 w2 = 2.0 return (w1 * K.mean(high_freq_loss)) + (w2 * K.mean(low_freq_loss))
def update_model(self): loss = [] for e in range(self.train_epoch): batch_G_idx, batch_S, batch_v, batch_R = self.graph_handler.genenrate_train_sample( ) for i, S, v, R in zip(batch_G_idx, batch_S, batch_v, batch_R): S, future_S = S[0], S[1] G = self.graph_handler.get_instance(i) A = G.get_adjacency_matrix() F = G.get_feature() W = G.get_weight() R = tf.convert_to_tensor(R, dtype=tf.float32) Q = tf.convert_to_tensor([[0.]], dtype=tf.float32) Q += R + self.discount * K.max( self.model_on_graph(ops.calculate_available_node(future_S), future_S, F, W, A)) loss.append(self.model_on_graph.update([v], S, F, W, A, Q)) return np.mean(loss)
def call(self, x, **kwargs): debug_print("call") # filters = K.zeros(shape=(N_filt, Filt_dim)) # Compute the filters. output_list = [] for i in range(self.N_filt): low_pass1 = ( 2 * self.filt_beg_freq[i] * sinc(self.filt_beg_freq[i] * self.freq_scale, self.t_right)) low_pass2 = ( 2 * self.filt_end_freq[i] * sinc(self.filt_end_freq[i] * self.freq_scale, self.t_right)) band_pass = low_pass2 - low_pass1 band_pass = band_pass / K.max(band_pass) output_list.append(band_pass * self.window) filters = K.stack(output_list) # (80, 251) filters = K.transpose(filters) # (251, 80) filters = K.reshape( filters, (self.Filt_dim, 1, self.N_filt) ) # (251,1,80) in TF: (filter_width, in_channels, out_channels) in # PyTorch (out_channels, in_channels, filter_width) """Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [ batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the stride and padding as in conv2d) and returned to the caller. """ # Do the convolution. debug_print("call") debug_print(" x", x) debug_print(" filters", filters) out = K.conv1d(x, kernel=filters) debug_print(" out", out) return out
def hop_and_maxpooling(self, convolution_output): ''' Input: batch size * speech length * number of filters ''' number_of_frames = np.int((self.audio_sample_length - self.fft_length) / self.fft_shift) number_of_sample = K.shape(convolution_output)[0] non_linearity = [] convolution_output_relu = K.relu(convolution_output) for i in range(0, self.F): filtered_x = convolution_output_relu[:, :, i] start_point = 0 stop_point = self.fft_length for j in range(0, number_of_frames): filtered_x_window = filtered_x[:, start_point:stop_point] max_value = K.log(K.max(filtered_x_window, axis=1) + 0.01) non_linearity.append(max_value) start_point = start_point + self.fft_shift stop_point = stop_point + self.fft_shift return K.reshape(K.cast(non_linearity, dtype='float32'), (number_of_sample, self.F * number_of_frames))
def generate_filters(self): #filters = K.zeros(shape=(N_filt, Filt_dim)) # Get beginning and end frequencies of the filters. min_freq = 50.0 min_band = 50.0 filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) + min_band / self.freq_scale) # Filter window (hamming). n = np.linspace(0, self.Filt_dim, self.Filt_dim) window = 0.54 - 0.46 * K.cos(2 * math.pi * n / self.Filt_dim) window = K.cast(window, "float32") # window = K.variable(window) # TODO what is this? t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2, int((self.Filt_dim - 1) / 2)) # t_right = K.variable(t_right_linspace / self.fs) # this line doesn't work in tf edge mode t_right = t_right_linspace / self.fs # Compute the filters. output_list = [] for i in range(self.N_filt): low_pass1 = 2 * filt_beg_freq[i] * sinc( filt_beg_freq[i] * self.freq_scale, t_right) low_pass2 = 2 * filt_end_freq[i] * sinc( filt_end_freq[i] * self.freq_scale, t_right) band_pass = (low_pass2 - low_pass1) band_pass = band_pass / K.max(band_pass) output_list.append(band_pass * window) filters = K.stack(output_list) #(80, 251) filters = K.transpose(filters) #(251, 80) filters = K.reshape( filters, (self.Filt_dim, 1, self.N_filt) ) #(251,1,80) in TF: (filter_width, in_channels, out_channels) in PyTorch (out_channels, in_channels, filter_width) return filters
def binary_crossentropy_with_ranking(y_true, y_pred): ''' Trying to combine ranking loss with numeric precision''' # first get the log loss like normal logloss = K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) # next, build a rank loss # clip the probabilities to keep stability y_pred_clipped = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # translate into the raw scores before the logit y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # determine what the maximum score for a zero outcome is y_pred_score_zerooutcome_max = K.max( tf.boolean_mask(y_pred_score, (y_true < 1))) # determine how much each score is above or below it rankloss = y_pred_score - y_pred_score_zerooutcome_max # only keep losses for positive outcomes rankloss = tf.boolean_mask(rankloss, tf.equal(y_true, 1)) # only keep losses where the score is below the max rankloss = K.square(K.clip(rankloss, -100, 0)) # average the loss for just the positive outcomes # tf.reduce_sum(tf.cast(myOtherTensor, tf.float32)) rankloss = K.sum(rankloss, axis=-1) / (K.sum(K.cast(y_true > 0, tf.float32) + 1)) return (rankloss + 1) * logloss # - an alternative to try
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): query, key, value = inputs else: query = key = value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt( K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.history_only: query_len, key_len = K.shape(query)[1], K.shape(key)[1] indices = K.expand_dims(K.arange(0, key_len), axis=0) upper = K.expand_dims(K.arange(0, query_len), axis=-1) e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) v = K.batch_dot(a, value) if self.return_attention: return [v, a] return v