def customLoss(yTrue, yPred): target = yTrue # output = yPred yPred /= tf.reduce_sum(yPred, reduction_indices=len(yPred.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy epsilon = K._to_tensor(tf.keras.backend.epsilon(), yPred.dtype.base_dtype) yPred = tf.clip_by_value(yPred, epsilon, 1. - epsilon) yPred = tf.log(yPred) ######apply weights here############### mask = K.cast(K.expand_dims(weights, axis=-1), dtype='float32') tensor_shape = yPred.get_shape() # x = tf.add(x, tf.constant(1, shape=x.shape)) yPred_stack = [] for i in range(tensor_shape[1]): mask_i = K.cast(K.expand_dims(mask[i], axis=-1), dtype='float32') yPred_i = K.cast(K.expand_dims(yPred[:, i], axis=-1), dtype='float32') yPred_stack.append(K.dot(yPred_i, mask_i)) output = tf.reshape(tf.stack(yPred_stack, axis=1, name='stack'), [-1, tensor_shape[1]]) return -tf.reduce_sum(target * output, reduction_indices=len(output.get_shape()) - 1)
def custom_weighted_binary_crossentropy(targets, logits, pos_weight=weight_array, name=None): # transform back to logits _epsilon = tfb._to_tensor(tfb.epsilon(), logits.dtype.base_dtype) logits = tf.clip_by_value(logits, _epsilon, 1 - _epsilon) logits = tf.log(logits / (1 - logits)) # compute weighted loss with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) loss = [] for i in range(0, label_num - 1): log_weight = 1 + (pos_weight[i] - 1) * targets[i] loss_i = math_ops.add( (1 - targets[i]) * logits[i], log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits[i]))) + nn_ops.relu(-logits[i])), name=name) loss.append(loss_i) return tf.reduce_mean(loss)
def K_sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): output_dimensions = list(range(len(output.get_shape()))) if axis != -1 and axis not in output_dimensions: raise ValueError('{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.get_shape())))) # If the channels are not in the last axis, move them to be there: if axis != -1 and axis != output_dimensions[-1]: permutation = output_dimensions[:axis] + output_dimensions[axis + 1:] permutation += [axis] output = tf.transpose(output, perm=permutation) # Note: tf.nn.sparse_softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output) output_shape = output.get_shape() targets = cast(flatten(target), 'int64') logits = tf.reshape(output, [-1, tf.shape(output)[-1]]) res = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) if len(output_shape) >= 3: # if our output includes timestep dimension # or spatial dimensions we need to reshape return tf.reshape(res, tf.shape(output)[:-1]) else: return res
def weighted_binary_crossentropy2(target, output): """ Weighted binary crossentropy between an output tensor and a target tensor. POS_WEIGHT is used as a multiplier for the positive targets. Combination of the following functions: * keras.losses.binary_crossentropy * keras.backend.tensorflow_backend.binary_crossentropy * tf.nn.weighted_cross_entropy_with_logits reference: https://stackoverflow.com/a/47313183/979377 """ # transform back to logits POS_WEIGHT = 10 # multiplier for positive targets, needs to be tuned _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) # compute weighted loss loss = tf.nn.weighted_cross_entropy_with_logits(targets=target, logits=output, pos_weight=POS_WEIGHT) return tf.reduce_mean(loss, axis=-1)
def new_loss(y_true, y_pred): # transform back to logits _epsilon = _to_tensor(epsilon(), y_pred.dtype.base_dtype) output = K.tf.clip_by_value(y_pred, _epsilon, 1 - _epsilon) return - y_true * K.pow(1-output, 2) * K.log(output) \ - (1-y_true) * K.pow(output, 2) * K.log(1-output)
def weighted_binary_crossentropy(target, output): POS_WEIGHT = 10 _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) loss = tf.nn.weighted_cross_entropy_with_logits(targets=target, logits=output, pos_weight=POS_WEIGHT) return tf.reduce_mean(loss, axis=-1)
def entropy_categorical_crossentropy(target, output): output /= tf.reduce_sum(output, axis=len(output.get_shape()) - 1, keep_dims=True) _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) return - tf.reduce_sum((target - .001*output) * tf.log(output), axis=len(output.get_shape()) - 1)
def cat_cross_inv(y_true, y_pred): #like the normal categorical crossentropy, but labels are reversed! #So this is how wrong the network was y_true = 1 - y_true axis = -1 y_pred /= tf.reduce_sum(y_pred, axis, True) # manual computation of crossentropy _epsilon = _to_tensor(epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, _epsilon, 1. - _epsilon) return -tf.reduce_sum(y_true * tf.log(y_pred), axis)
def clip(x, min_value, max_value): """Element-wise value clipping. If min_value > max_value, clipping range is [min_value,min_value]. # Arguments x: Tensor or variable. min_value: Tensor, float, int, or None. If min_value is None, defaults to -infinity. max_value: Tensor, float, int, or None. If max_value is None, defaults to infinity. # Returns A tensor. """ if max_value is None: max_value = np.inf if min_value is None: min_value = -np.inf min_value = _to_tensor(min_value, x.dtype.base_dtype) max_value = _to_tensor(max_value, x.dtype.base_dtype) max_value = tf.maximum(min_value, max_value) return tf.clip_by_value(x, min_value, max_value)
def bootstrapped_crossentropy(y_true, y_pred, bootstrap_type='hard', alpha=0.95): target_tensor = y_true prediction_tensor = y_pred _epsilon = _to_tensor(K.epsilon(), prediction_tensor.dtype.base_dtype) prediction_tensor = K.tf.clip_by_value(prediction_tensor, _epsilon, 1 - _epsilon) prediction_tensor = K.tf.log(prediction_tensor / (1 - prediction_tensor)) if bootstrap_type == 'soft': bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.tf.sigmoid(prediction_tensor) else: bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.tf.cast( K.tf.sigmoid(prediction_tensor) > 0.5, K.tf.float32) return K.mean(K.tf.nn.sigmoid_cross_entropy_with_logits( labels=bootstrap_target_tensor, logits=prediction_tensor))
def softmax_cross_entropy(target, output, from_logits=True, axis=-1, normalize=False): """ Compute Softmax cross entropy loss for sparse target. Args: target (tensor): Target label. If 2D, shape is (w, h). output (tensor): Logits or Probabilities. If 2D, shape is (w, h, ch). from_logits (bool, optional): logits or softmax outputs? Defaults to True. axis (int, optional): Specifying the channels axis. Defaults to -1. normalize (bool, optional): Normalize loss across all instances. Defaults to False. """ _check_dtype(target, 'int32') _check_dtype(output, 'float32') output_dimensions = list(range(len(output.get_shape()))) if axis != -1 and axis not in output_dimensions: raise ValueError('{}{}{}'.format( 'Unexpected channels axis {}. '.format(axis), 'Expected to be -1 or one of the axes of `output`, ', 'which has {} dimensions.'.format(len(output.get_shape())))) # move the channels to be in the last axis: if axis != -1 and axis != output_dimensions[-1]: permutation = output_dimensions[:axis] + output_dimensions[axis + 1:] permutation += [axis] output = tf.transpose(output, perm=permutation) # convert to the logits if not from_logits: _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output) # NOTE: log(exp(x)) = x logits = output # softmax_cross_entropy output_shape = output.get_shape() targets = cast(tf.reshape(target, tf.shape(output)[:-1]), 'int32') # NOTE: cast... res = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) # reduce if normalize: return tf.reduce_mean(res) else: return tf.reduce_sum(tf.reduce_mean(res, axis=0)) # only batch-axis
def weighted_binary_crossentropy(output, target, pos_weight, from_logits=False): '''Binary crossentropy between an output tensor and a target tensor. ''' # Note: tf.nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) output = tf.clip_by_value(output, epsilon, 1 - epsilon) output = tf.log(output / (1 - output)) return tf.nn.weighted_cross_entropy_with_logits(output, target, pos_weight)
def custom_categorical_crossentropy(target, output, from_logits=False, delta=1e-7): if not from_logits: output /= tf.reduce_sum(output, axis=len(output.get_shape()) - 1, keep_dims=True) _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) return -tf.reduce_sum(target * tf.log(output + delta), axis=len(output.get_shape()) - 1) else: return tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=output)
def loss_bce(y_true, y_pred): # bootstrapped binary cross entropy from keras.backend.tensorflow_backend import _to_tensor target_tensor = y_true prediction_tensor = y_pred _epsilon = _to_tensor(K.epsilon(), prediction_tensor.dtype.base_dtype) prediction_tensor = K.tf.clip_by_value(prediction_tensor, _epsilon, 1 - _epsilon) prediction_tensor = K.tf.log(prediction_tensor / (1 - prediction_tensor)) alpha = 0.95 # bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.unet_tf.sigmoid(prediction_tensor) # soft bootstrap bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.tf.cast( K.tf.sigmoid(prediction_tensor) > 0.5, K.tf.float32) # hard bootstrap return K.mean( K.tf.nn.sigmoid_cross_entropy_with_logits( labels=bootstrap_target_tensor, logits=prediction_tensor))
def my_weighted_binary_crossentropy(output, target, from_logits=False): ''' Note: this implementation is based on the tensorflow implementation. Binary crossentropy between an output tensor and a target tensor. ''' # Note: tf.nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits. Avoid numerical instability with _EPSILON clipping epsilon = TB._to_tensor( _EPSILON, output.dtype.base_dtype ) # output.dtype.base_dtype, in docker output.dtype.base output = tf.clip_by_value(output, epsilon, 1 - epsilon) output = tf.log(output / (1 - output)) zeros = tf_count(target, 0) print 'zeros', zeros # # print 'zeros=', zeros.eval(session=tf.Session()) non_zeros = tf_count(target, 1) print 'non_zeros', non_zeros # # print 'non_zeros=', non_zeros.eval(session=tf.Session()) weight = tf.div(non_zeros, non_zeros) print 'weight', weight # # print 'weight=\n', weight.eval(session=tf.Session()) # value = my_weighted_cross_entropy_with_logits(output, target, pos_weight=1.0) value = tf.nn.weighted_cross_entropy_with_logits(output, target, pos_weight=weight) # print 'value ', value.eval(session=tf.Session()) # value / zeros -> value / (nb_values-non_zeros) # print 'normalized ', normalized.eval(session=tf.Session()) # tf.Print(normalized, [normalized], "normalized") # tf.Print(value, [value], "normalized") # Multiply the result by a constant -> nb_values to avoid very small values (<1.0) # print nb_values # nb_values_float = tf.to_float(nb_values) # tensor_nb_values = tf.convert_to_tensor(nb_values_float, dtype=tf.float32) # convert value to tensor # final_value = tf.mul(value, tensor_nb_values) # # normalized = tf.div(final_value, zeros) # print 'final_value', final_value.eval(session=tf.Session()) return value
def compute_output_shape(self, input_shape, symbolic=False): """ output.ndim = input.ndim, the sizes of the last 2 dimensions get modified by the convolution. symbolic = True if requesting a symbolic expression for the output-shape. """ input_seq_length = input_shape[-2] output_seq_length = conv_utils.conv_output_length( input_seq_length, self.kernel_size[0], padding=self.padding, stride=self.strides[0], dilation=self.dilation_rate[0]) # Handle the different cases of calls to this method ... if K.backend() == 'theano': # In Theano a tensor's shape is a tuple output_shape = tuple( input_shape[:-2]) + (output_seq_length, self.filters) elif K.backend() == 'tensorflow': if symbolic: # In TensorFlow a tensor's shape is a tensor. out_shape_sfx = _to_tensor([output_seq_length, self.filters], 'int32') output_shape = K.concatenate([input_shape[:-2], out_shape_sfx], axis=0) else: if isinstance(input_shape, (list, tuple)): output_shape = tuple( input_shape[:-2]) + (output_seq_length, self.filters) else: # Not clear if this case ever happens output_shape = tuple([None] * K.ndim(input_shape)) else: raise NotImplementedError("Backend '{}' not supported".format( K.backend())) return output_shape
def weighted_binary_crossentropy(target, output): """ Weighted binary crossentropy between an output tensor and a target tensor. POS_WEIGHT is used as a multiplier for the positive targets. Combination of the following functions: * keras.losses.binary_crossentropy * keras.backend.tensorflow_backend.binary_crossentropy * tf.nn.weighted_cross_entropy_with_logits """ # transform back to logits _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) # compute weighted loss loss = tf.nn.weighted_cross_entropy_with_logits(targets=target, logits=output, pos_weight=true_weight) return tf.reduce_mean(loss, axis=-1)
def binary_crossentropy_custom_tf(target, output, from_logits=True): """Binary crossentropy between an output tensor and a target tensor. # Arguments target: A tensor with the same shape as `output`. output: A tensor. from_logits: Whether `output` is expected to be a logits tensor. By default, we consider that `output` encodes a probability distribution. # Returns A tensor. """ # Note: tf.nn.sigmoid_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype) output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
idxs = [ word_to_index_han[elt] if elt in word_to_index_han else oov_idx for elt in words ] sents_idxs.append(idxs) max_sent_size = min(max([len(s) for s in sents_idxs]), max_sent_size_overall) sents_idxs_padded = [ s + [padding_idx] * (max_sent_size - len(s)) if len(s) < max_sent_size else s[:max_sent_size] for s in sents_idxs ] reshaped_sentences = np.reshape(np.array(sents_idxs_padded), (1, len(sents), max_sent_size)) reshaped_sentences_tensor = _to_tensor( reshaped_sentences, dtype='float32') # a layer, unlike a model, requires tf tensor as input print('== attention over words ==') sents_att_coeffs = TimeDistributed(get_sent_att_coeffs)( reshaped_sentences_tensor) word_coeffs = sents_att_coeffs.eval(session=sess) word_coeffs = np.reshape(word_coeffs, (len(sents), max_sent_size)) doc_att_tensor = get_doc_attention_coeffs(reshaped_sentences_tensor) doc_att = doc_att_tensor.eval(session=sess)[0] res_tensor = han(reshaped_sentences_tensor) res = res_tensor.eval(session=sess) print(doc_att) my_wcs = [] my_values_array = []
def run_layer(keras_layer, x): return get_value(keras_layer.call(_to_tensor(np.expand_dims(x, 0), FLOAT_DTYPE))).squeeze()
def my_bce(target, output): epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) output = tf.clip_by_value(output, epsilon, 1. - epsilon) return -tf.reduce_sum(target[:, :, :, :-1] * tf.log(output), axis=-1)
my_review_text = [[index_to_word[idx] for idx in sent if idx in index_to_word] for sent in my_review.tolist()[0]] # = = = attention over sentences in the document sent_coeffs = get_sent_attention_coeffs.predict(my_review) sent_coeffs = sent_coeffs[0, :, :] for elt in zip(sent_coeffs[:, 0].tolist(), [' '.join(elt) for elt in my_review_text]): print(round(elt[0] * 100, 2), elt[1]) # = = = attention over words in each sentence my_review_tensor = _to_tensor( my_review, dtype='float32' ) # a layer, unlike a model, requires a TensorFlow tensor as input ### fill the gap (one line) ### # apply the 'get_word_att_coeffs' model over all the sentences in 'my_review_tensor', and store the results as 'word_coeffs' word_coeffs = TimeDistributed(get_word_att_coeffs)(my_review_tensor) word_coeffs = K.eval( word_coeffs ) # shape = (1, 7, 30, 1): (batch size, nb of sents in doc, nb of words per sent, coeff) word_coeffs = word_coeffs[ 0, :, :, 0] # shape = (7, 30) (coeff for each word in each sentence) word_coeffs = sent_coeffs * word_coeffs # re-weigh according to sentence importance word_coeffs = np.round((word_coeffs * 100).astype(np.float64), 2)
def clip_negatives(w, min=eps()): max_value = _to_tensor(3000, w.dtype.base_dtype) zero = _to_tensor(min, w.dtype.base_dtype) return tf.clip_by_value(w, zero, max_value)