def stage3(x, label): x = stage(x, "s3") logits = math_ops.reduce_sum(x, axis=[1]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)) return loss
def stage4(x, label): with variable_scope.variable_scope("stage4", use_resource=True): logits = math_ops.reduce_sum(x, axis=[-1]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)) return loss
def sparse_softmax_cross_entropy(logits, labels, weights=1.0, scope=None): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. Args: logits: [batch_size, num_classes] logits outputs of the network . labels: [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, num_classes)`. weights: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size] or [batch_size, 1]. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the mean loss value. Raises: ValueError: If the shapes of `logits`, `labels`, and `weights` are incompatible, or if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", [logits, labels, weights]) as scope: labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope=scope)
def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of shape `[batch_size]`, then the loss weights apply to each corresponding sample. Args: labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or `float64`. weights: Coefficients for the loss. This must be scalar or broadcastable to `labels` (i.e. same rank and each dimension is either 1 or the same). scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `logits`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shapes of `logits`, `labels`, and `weights` are incompatible, or if any of them are None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if logits is None: raise ValueError("logits must not be None.") with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", (logits, labels, weights)) as scope: # As documented above in Args, labels contain class IDs and logits contains # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; # therefore, expected_rank_diff=1. labels, logits, weights = _remove_squeezable_dimensions( labels, logits, weights, expected_rank_diff=1) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope, loss_collection, reduction=reduction)
def _softmax_cross_entropy_loss(logits, target): check_shape_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(target), 2), ["target's shape should be either [batch_size, 1] or [batch_size]"]) with ops.control_dependencies([check_shape_op]): target = array_ops.reshape(target, shape=[array_ops.shape(target)[0]]) return nn.sparse_softmax_cross_entropy_with_logits(logits, target)
def fwd_fn(idx, label): np.random.seed(1) embedding_shape = (dataset_size, embedding_size) embedding_initializer = np.random.normal(0, 1, embedding_shape).astype( np.float32) weights_shape = (embedding_size, embedding_size) weights_initializer = np.random.normal(0, 1, weights_shape).astype(np.float32) with variable_scope.variable_scope("part1", use_resource=True): embedding = variable_scope.get_variable( "c", dtype=np.float32, initializer=embedding_initializer, trainable=True) weight = variable_scope.get_variable("w0", dtype=np.float32, initializer=weights_initializer, trainable=True) x = embedding_ops.embedding_lookup(embedding, idx) x = math_ops.matmul(x, weight) logits = math_ops.reduce_sum(x, axis=[-1]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)) return loss
def stage3(x, label): with variable_scope.variable_scope("stage3", use_resource=True): x = math_ops.reduce_mean(x, axis=[1, 2]) x = fc(x, 100) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label)) return loss
def xent(y_true, y_pred): y_true = keras.backend.cast( keras.backend.reshape(y_true, (-1,)), dtypes.int32) return nn.sparse_softmax_cross_entropy_with_logits( labels=y_true, logits=y_pred)
def model(): print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) X_expand = tf.expand_dims(X_pl, axis=2) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) cell_fw = tf.nn.rnn_cell.GRUCell(205) cell_bw = tf.nn.rnn_cell.GRUCell(205) seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1) _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len, dtype=tf.float32) enc_states = tf.concat(1, enc_states) enc_states_drop = dropout(enc_states, is_training=is_training_pl) l1 = fully_connected(enc_states_drop, 200, activation_fn=None) l1 = batch_norm(l1, is_training=is_training_pl) l1_relu = relu(l1) l1_dropout = dropout(l1_relu, is_training=is_training_pl) l2 = fully_connected(l1_dropout, 200, activation_fn=None) l2 = batch_norm(l2, is_training=is_training_pl) l2_relu = relu(l2) l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def _softmax_cross_entropy_loss(logits, labels): # Check that we got integer for classification. if not labels.dtype.is_integer: raise ValueError("Labels dtype should be integer " "Instead got %s." % labels.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] labels. if len(labels.get_shape()) == 2: labels = array_ops.squeeze(labels, squeeze_dims=[1]) return nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
def model(): print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) X_expand = tf.expand_dims(X_pl, axis=2) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) cell_fw = tf.nn.rnn_cell.GRUCell(100) cell_bw = tf.nn.rnn_cell.GRUCell(100) seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1) _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len, dtype=tf.float32) enc_states = tf.concat(1, enc_states) enc_states_drop = dropout(enc_states, is_training=is_training_pl) l1 = fully_connected(enc_states_drop, 100, activation_fn=None) l1 = batch_norm(l1, is_training=is_training_pl) l1_relu = relu(l1) l1_dropout = dropout(l1_relu, is_training=is_training_pl) l2 = fully_connected(l1_dropout, 100, activation_fn=None) l2 = batch_norm(l2, is_training=is_training_pl) l2_relu = relu(l2) l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def body(x, labels): x = cached_func(x, "1") x = cached_func(x, "2") loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=labels)) train_op = gradient_descent.GradientDescentOptimizer( 0.001).minimize(loss) return x, train_op
def _softmax_cross_entropy_loss(logits, target): # Check that we got integer for classification. if not target.dtype.is_integer: raise ValueError("Target's dtype should be integer " "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: target = array_ops.squeeze(target, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target) return loss_vec
def _softmax_cross_entropy_loss(logits, target): # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. # Check that we got int32/int64 for classification. if not target.dtype.is_compatible_with(dtypes.int64) and not target.dtype.is_compatible_with(dtypes.int32): raise ValueError("Target's dtype should be int32, int64 or compatible. " "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: target = array_ops.squeeze(target, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target) return loss_vec
def sparse_softmax_cross_entropy( labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of shape `[batch_size]`, then the loss weights apply to each corresponding sample. Args: labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or `float64`. weights: Coefficients for the loss. This must be scalar or broadcastable to `labels` (i.e. same rank and each dimension is either 1 or the same). scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss `Tensor` of the same type as `logits`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shapes of `logits`, `labels`, and `weights` are incompatible, or if any of them are None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if logits is None: raise ValueError("logits must not be None.") with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", (logits, labels, weights)) as scope: # As documented above in Args, labels contain class IDs and logits contains # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; # therefore, expected_rank_diff=1. labels, logits, weights = _remove_squeezable_dimensions( labels, logits, weights, expected_rank_diff=1) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def model(): tf.set_random_seed(1) print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [ None, ]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) X_bn = batch_norm(X_pl, is_training=is_training_pl) print("X_bn", X_bn.get_shape()) l1 = fully_connected(X_pl, num_outputs=100, activation_fn=relu) #, normalizer_fn=batch_norm) print("l1", l1.get_shape()) l1_drop = dropout(l1, is_training=is_training_pl) print("l1_drop", l1_drop.get_shape()) l_out = fully_connected(l1_drop, num_outputs=num_classes, activation_fn=None) print("l_out", l_out.get_shape()) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = (tf.clip_by_global_norm( gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def body(a, b, labels): a, b = _test_multi_conv_wrapper(convs)(a, b) a, b = _test_multi_conv_wrapper(convs)(a, b) a = math_ops.reduce_mean(a, axis=[1, 2]) + math_ops.reduce_mean( b, axis=[1, 2]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=a, labels=labels)) train_op = gradient_descent.GradientDescentOptimizer( 0.001).minimize(loss) return a, train_op
def _softmax_cross_entropy_loss(logits, target): # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. # Check that we got int32/int64 for classification. if (not target.dtype.is_compatible_with(dtypes.int64) and not target.dtype.is_compatible_with(dtypes.int32)): raise ValueError("Target's dtype should be int32, int64 or compatible. " "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: target = array_ops.squeeze(target, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target) return loss_vec
def graph(x, label): x = conv(x, 3, 1, 16) x = nn.relu(x) x = conv(x, 3, 1, 100) x = nn.relu(x) x = math_ops.reduce_mean(x, axis=[1, 2]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label)) opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) return loss, opt
def graph(img, label): x = conv(img, 7, 2, 16) x = nn.relu(x) x = max_pool(x, ksize=3, stride=2) x = block("b", 2, 64, 1, x) x = math_ops.reduce_mean(x, axis=[1, 2]) x = fc(x, 100) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label)) opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) return loss, opt
def sparse_softmax_cross_entropy( labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of shape [`batch_size`], then the loss weights apply to each corresponding sample. Args: labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. weights: Coefficients for the loss. This must be scalar or of same rank as `labels` scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shapes of logits, labels, and weight are incompatible, or if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", (logits, labels, weights)) as scope: # As documented above in Args, labels contain class IDs and logits contains # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; # therefore, expected_rank_diff=1. labels, logits, weights = _remove_squeezable_dimensions( labels, logits, weights, expected_rank_diff=1) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope, loss_collection, reduction=reduction)
def stage5(x, label): # Ruse the weight here. with variable_scope.variable_scope("vs", use_resource=True, reuse=True): weight = variable_scope.get_variable( "w0", shape=[4, 4], dtype=np.float32, initializer=init_ops.ones_initializer()) x = math_ops.matmul(x, weight) logits = math_ops.reduce_mean(x, axis=[1]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)) return loss
def model(): tf.set_random_seed(1) print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) X_bn = batch_norm(X_pl, is_training=is_training_pl) print("X_bn", X_bn.get_shape()) l1 = fully_connected(X_pl, num_outputs=100, activation_fn=relu)#, normalizer_fn=batch_norm) print("l1", l1.get_shape()) l1_drop = dropout(l1, is_training=is_training_pl) print("l1_drop", l1_drop.get_shape()) l_out = fully_connected(l1_drop, num_outputs=num_classes, activation_fn=None) print("l_out", l_out.get_shape()) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def fwd_fn(idx, label): with variable_scope.variable_scope("part1", use_resource=True): embedding = variable_scope.get_variable( "c", shape=[10, 1216], dtype=np.float32, initializer=init_ops.constant_initializer(10.01), trainable=True) x = embedding_ops.embedding_lookup(embedding, idx) logits = math_ops.reduce_sum(x, axis=[-1]) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)) return loss
def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(logits, labels, name=None): """Computes sparse softmax cross entropy between `logits` and `labels`. This function diffs from tf.nn.sparse_softmax_cross_entropy_with_logits only in the argument order. Measures the probability error in discrete classification tasks in which the classes are mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is labeled with one and only one label: an image can be a dog or a truck, but not both. **NOTE:** For this operation, the probability of a given label is considered exclusive. That is, soft classes are not allowed, and the `labels` vector must provide a single specific index for the true class for each row of `logits` (each minibatch entry). For soft softmax classification with a probability distribution for each entry, see `softmax_cross_entropy_with_logits`. **WARNING:** This op expects unscaled logits, since it performs a softmax on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. A common use case is to have logits of shape `[batch_size, num_classes]` and labels of shape `[batch_size]`. But higher dimensions are supported. Args: logits: Unscaled log probabilities of rank `r` and shape `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`. labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding corresponding loss and gradient rows on GPU. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `labels` and of the same type as `logits` with the softmax cross entropy loss. Raises: ValueError: If logits are scalars (need to have rank >= 1) or if the rank of the labels is not equal to the rank of the logits minus one. """ return nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name=name)
def deprecated_flipped_sparse_softmax_cross_entropy_with_logits( logits, labels, name=None): """Computes sparse softmax cross entropy between `logits` and `labels`. This function diffs from tf.nn.sparse_softmax_cross_entropy_with_logits only in the argument order. Measures the probability error in discrete classification tasks in which the classes are mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is labeled with one and only one label: an image can be a dog or a truck, but not both. **NOTE:** For this operation, the probability of a given label is considered exclusive. That is, soft classes are not allowed, and the `labels` vector must provide a single specific index for the true class for each row of `logits` (each minibatch entry). For soft softmax classification with a probability distribution for each entry, see `softmax_cross_entropy_with_logits`. **WARNING:** This op expects unscaled logits, since it performs a softmax on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. A common use case is to have logits of shape `[batch_size, num_classes]` and labels of shape `[batch_size]`. But higher dimensions are supported. Args: logits: Unscaled log probabilities of rank `r` and shape `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`. labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding corresponding loss and gradient rows on GPU. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `labels` and of the same type as `logits` with the softmax cross entropy loss. Raises: ValueError: If logits are scalars (need to have rank >= 1) or if the rank of the labels is not equal to the rank of the labels minus one. """ return nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name=name)
def fwd_fn(img, label): with variable_scope.variable_scope("part1", use_resource=True): x = conv(img, 7, 2, 8) x = nn.relu(x) x = max_pool(x, ksize=3, stride=2) with variable_scope.variable_scope("part2", use_resource=True): x = block("b", 2, 32, 1, x) with variable_scope.variable_scope("part3", use_resource=True): x = math_ops.reduce_mean(x, axis=[1, 2]) x = fc(x, 100) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label)) return loss
def _loss(self, logits, target, weight_tensor): if self._n_classes < 2: loss_vec = math_ops.square(logits - math_ops.to_float(target)) elif self._n_classes == 2: loss_vec = nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target)) else: loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, array_ops.reshape(target, [-1])) if weight_tensor is None: return math_ops.reduce_mean(loss_vec, name="loss") else: loss_vec = array_ops.reshape(loss_vec, shape=(-1,)) loss_vec = math_ops.mul(loss_vec, array_ops.reshape(weight_tensor, shape=(-1,))) return math_ops.div( math_ops.reduce_sum(loss_vec), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss" )
def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): """Categorical crossentropy with integer targets. Arguments: target: An integer tensor. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. axis: Int specifying the channels axis. `axis=-1` corresponds to data format `channels_last', and `axis=1` corresponds to data format `channels_first`. Returns: Output tensor. Raises: ValueError: if `axis` is neither -1 nor one of the axes of `output`. """ if not from_logits: if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or output.op.type != 'Softmax'): epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_) output = math_ops.log(output) else: # When softmax activation function is used for output operation, we # use logits from the softmax function directly to compute loss in order # to prevent collapsing zero when training. # See b/117284466 assert len(output.op.inputs) == 1 output = output.op.inputs[0] rank = len(output.shape) axis = axis % rank if axis != rank - 1: permutation = list(range(axis)) + list(range(axis + 1, rank)) + [axis] output = array_ops.transpose(output, perm=permutation) output_shape = output.shape targets = cast(flatten(target), 'int64') logits = array_ops.reshape(output, [-1, int(output_shape[-1])]) res = nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) if len(output_shape) >= 3: # If our output includes timesteps or spatial dimensions we need to reshape return array_ops.reshape(res, array_ops.shape(output)[:-1]) else: return res
def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of shape [`batch_size`], then the loss weights apply to each corresponding sample. WARNING: `weights` also supports dimensions of 1, but the broadcasting does not work as advertised, you'll wind up with weighted sum instead of weighted mean for any but the last dimension. This will be cleaned up soon, so please do not rely on the current behavior for anything but the shapes documented for `weights` below. Args: labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, num_classes)`. logits: [batch_size, num_classes] logits outputs of the network . weights: Coefficients for the loss. This must be of shape `[batch_size]` or `[batch_size, 1]`. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` representing the mean loss value. Raises: ValueError: If the shapes of logits, labels, and weight are incompatible, or if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", [logits, labels, weights]) as scope: labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") # Reshape losses to [batch_size, 1] to be consistent with weights. losses = array_ops.reshape(losses, shape=[array_ops.shape(losses)[0], 1]) return compute_weighted_loss(losses, weights, scope, loss_collection)
def graph(x, label): a = fc(x, 48) a = nn.relu(a) b = fc(x, 48) b = nn.relu(b) x = a + b a = fc(x, 100) a = nn.relu(a) b = fc(x, 100) b = nn.relu(b) x = a + b loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label)) opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) return loss, opt
def sparse_softmax_cross_entropy( labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of shape [`batch_size`], then the loss weights apply to each corresponding sample. Args: labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. Other values will raise an exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. weights: Coefficients for the loss. This must be scalar or of same rank as `labels` scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shapes of logits, labels, and weight are incompatible, or if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", (logits, labels, weights)) as scope: # As documented above in Args, labels contain class IDs and logits contains # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; # therefore, expected_rank_diff=1. labels, logits, weights = _remove_squeezable_dimensions( labels, logits, weights, expected_rank_diff=1) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def _loss_vec(self, logits, target): if self._n_classes == 2: # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. if len(target.get_shape()) == 1: target = array_ops.expand_dims(target, dim=[1]) loss_vec = nn.sigmoid_cross_entropy_with_logits( logits, math_ops.to_float(target)) else: # Check that we got int32/int64 for classification. if (not target.dtype.is_compatible_with(dtypes.int64) and not target.dtype.is_compatible_with(dtypes.int32)): raise ValueError("Target's dtype should be int32, int64 or compatible. " "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: target = array_ops.squeeze(target, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits( logits, target) return loss_vec
def body(a, b, c, labels): with variable_scope.variable_scope("vs", use_resource=True): w0 = variable_scope.get_variable( "w0", shape=[64, 64], dtype=np.float32, initializer=init_ops.ones_initializer()) w1 = variable_scope.get_variable( "w1", shape=[64, 64], dtype=np.float32, initializer=init_ops.ones_initializer()) a = func(a, w0, b) a = a - func(a, w1, c) loss = math_ops.reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(logits=a, labels=labels)) train_op = gradient_descent.GradientDescentOptimizer( 0.001).minimize(loss) return a, train_op
def _loss(self, logits, target, weight_tensor): if self._n_classes < 2: loss_vec = math_ops.square(logits - math_ops.to_float(target)) elif self._n_classes == 2: loss_vec = nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target)) else: loss_vec = nn.sparse_softmax_cross_entropy_with_logits( logits, array_ops.reshape(target, [-1])) if weight_tensor is None: return math_ops.reduce_mean(loss_vec, name="loss") else: loss_vec = array_ops.reshape(loss_vec, shape=(-1,)) loss_vec = math_ops.mul( loss_vec, array_ops.reshape(weight_tensor, shape=(-1,))) return math_ops.div( math_ops.reduce_sum(loss_vec), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss")
def forward(self, y_true, y_pred): dims = list(range(len(y_pred.get_shape()))) if self.axis != -1 and self.axis not in dims: raise ValueError("Axis out of y_pred's dimensions") if self.axis != -1 and self.axis != dims[-1]: perm = dims[:self.axis] + dims[self.axis + 1:] perm += [self.axis] y_pred = array_ops.transpose(y_pred, perm=perm) if not self.from_logits: if isinstance(y_pred, (ops.EagerTensor, variables.Variable))\ or y_pred.op.type != 'Softmax': y_pred = clip_ops.clip_by_value(t=y_pred, clip_value_min=epsilon, clip_value_max=1 - epsilon) y_pred = math_ops.log(y_pred) else: # When softmax activation function is used for output operation, we # use logits from the softmax function directly to compute loss in order # to prevent collapsing zero when training. # See b/117284466 assert len(y_pred.op.inputs) == 1 y_pred = y_pred.op.inputs[0] rank = len(y_pred.shape) self.axis = self.axis % rank if self.axis != rank - 1: permutation = list(range(self.axis)) + list( range(self.axis + 1, rank)) + [self.axis] y_pred = array_ops.transpose(y_pred, perm=permutation) shape = y_pred.shape y_true = F.int64(array_ops.reshape(y_true, [-1])) logits = array_ops.reshape(y_pred, [-1, int(shape[-1])]) res = nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=logits) if len(shape) >= 3: return array_ops.reshape(res, array_ops.shape(y_pred)[:-1]) else: return res
# Up sample 1. Upscale to 128 x 128 x 24 wt1 = tf.Variable(tf.truncated_normal([3, 3, 24, 32])) convt_1 = sigmoid( conv2d_transpose(conv_0, filter=wt1, output_shape=[batch_size, 128, 128, 24], strides=[1, 1, 1, 1])) print("Deconvolution 1:", convt_1) # Up sample 2. Upscale to 256 x 256 x 2 wt2 = tf.Variable(tf.truncated_normal([3, 3, 2, 24])) convt_2 = sigmoid( conv2d_transpose(convt_1, filter=wt2, output_shape=[batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 2], strides=[1, 2, 2, 1])) print("Deconvolution 2:", convt_2) end1 = time.time() print("Checkpoint model", end1 - start) # Loss computation logits = tf.reshape(convt_2, [-1, num_labels]) reshaped_labels = tf.reshape(labels, [-1]) cross_entropy = sparse_softmax_cross_entropy_with_logits( logits=logits, labels=reshaped_labels) loss = tf.reduce_mean(cross_entropy) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) end2 = time.time() print("Checkpoint eval", end2 - end1)
def adaptive_softmax_loss(inputs, labels, cutoff, project_factor=4, initializer=None, name=None): """Computes and returns the adaptive softmax loss (a improvement of hierarchical softmax). See [Efficient softmax approximation for GPUs](https://arxiv.org/pdf/1609.04309v2.pdf). This is a faster way to train a softmax classifier over a huge number of classes, and can be used for **both training and prediction**. For example, it can be used for training a Language Model with a very huge vocabulary, and the trained languaed model can be used in speech recognition, text generation, and machine translation very efficiently. Args: inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. cutoff: A list indicating the limits of the different clusters. project_factor: A floating point value greater or equal to 1.0. The projection factor between two neighboring clusters. initializer: Initializer for adaptive softmax variables (optional). name: A name for the operation (optional). Returns: loss: A `batch_size` 1-D tensor of the adaptive softmax cross entropy loss. training_losses: A list of 1-D tensors of adaptive softmax loss for each cluster, which can be used for calculating the gradients and back propagation when training. """ input_dim = int(inputs.get_shape()[1]) sample_num = int(inputs.get_shape()[0]) cluster_num = len(cutoff) - 1 with ops.name_scope(name, "AdaptiveSoftmax"): if initializer is None: stdv = math.sqrt(1. / input_dim) initializer = init_ops.random_uniform_initializer(-stdv * 0.8, stdv * 0.8) head_dim = cutoff[0] + cluster_num head_w = variable_scope.get_variable("adaptive_softmax_head_w", [input_dim, head_dim], initializer=initializer) tail_project_factor = project_factor tail_w = [] for i in range(cluster_num): project_dim = max(1, input_dim // tail_project_factor) tail_dim = cutoff[i + 1] - cutoff[i] tail_w.append([ variable_scope.get_variable("adaptive_softmax_tail{}_proj_w".format(i+1), [input_dim, project_dim], initializer=initializer), variable_scope.get_variable("adaptive_softmax_tail{}_w".format(i+1), [project_dim, tail_dim], initializer=initializer) ]) tail_project_factor *= project_factor # Get tail masks and update head labels training_losses = [] loss = array_ops.zeros([sample_num], dtype=dtypes.float32) head_labels = labels for i in range(cluster_num): mask = math_ops.logical_and(math_ops.greater_equal(labels, cutoff[i]), math_ops.less(labels, cutoff[i + 1])) # Update head labels head_labels = tf.where(mask, array_ops.constant([cutoff[0] + i] * sample_num), head_labels) # Compute tail loss tail_inputs = array_ops.boolean_mask(inputs, mask) tail_logits = math_ops.matmul(math_ops.matmul(tail_inputs, tail_w[i][0]), tail_w[i][1]) tail_labels = array_ops.boolean_mask(labels - cutoff[i], mask) tail_loss = nn.sparse_softmax_cross_entropy_with_logits(labels=tail_labels, logits=tail_logits) training_losses.append(tail_loss) aligned_tail_loss = sparse_tensor.SparseTensor( array_ops.squeeze(array_ops.where(mask)), tail_loss, [sample_num]) loss += sparse_ops.sparse_tensor_to_dense(aligned_tail_loss) # Compute head loss head_logits = math_ops.matmul(inputs, head_w) head_loss = nn.sparse_softmax_cross_entropy_with_logits(logits=head_logits, labels=head_labels) loss += head_loss training_losses.append(head_loss) return loss, training_losses