示例#1
0
 def stage3(x, label):
     x = stage(x, "s3")
     logits = math_ops.reduce_sum(x, axis=[1])
     loss = math_ops.reduce_mean(
         nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=label))
     return loss
示例#2
0
 def stage4(x, label):
     with variable_scope.variable_scope("stage4", use_resource=True):
         logits = math_ops.reduce_sum(x, axis=[-1])
         loss = math_ops.reduce_mean(
             nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                         labels=label))
         return loss
示例#3
0
def sparse_softmax_cross_entropy(logits, labels, weights=1.0, scope=None):
  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of size [`batch_size`], then the loss weights apply to each
  corresponding sample.

  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
    labels: [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64`
      in the range `[0, num_classes)`.
    weights: Coefficients for the loss. The tensor must be a scalar or a tensor
      of shape [batch_size] or [batch_size, 1].
    scope: the scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shapes of `logits`, `labels`, and `weights` are
      incompatible, or if `weights` is None.
  """
  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                      [logits, labels, weights]) as scope:
    labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])

    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                         logits=logits,
                                                         name="xentropy")
    return compute_weighted_loss(losses, weights, scope=scope)
示例#4
0
def sparse_softmax_cross_entropy(labels,
                                 logits,
                                 weights=1.0,
                                 scope=None,
                                 loss_collection=ops.GraphKeys.LOSSES,
                                 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  Args:
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
      must be an index in `[0, num_classes)`. Other values will raise an
      exception when this op is run on CPU, and return `NaN` for corresponding
      loss and gradient rows on GPU.
    logits: Unscaled log probabilities of shape
      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
      `float64`.
    weights: Coefficients for the loss. This must be scalar or broadcastable to
      `labels` (i.e. same rank and each dimension is either 1 or the same).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has the same shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits`, `labels`, and `weights` are
      incompatible, or if any of them are None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if logits is None:
        raise ValueError("logits must not be None.")
    with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                        (logits, labels, weights)) as scope:
        # As documented above in Args, labels contain class IDs and logits contains
        # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
        # therefore, expected_rank_diff=1.
        labels, logits, weights = _remove_squeezable_dimensions(
            labels, logits, weights, expected_rank_diff=1)
        losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=logits,
                                                             name="xentropy")
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
示例#5
0
def _softmax_cross_entropy_loss(logits, target):
    check_shape_op = control_flow_ops.Assert(
        math_ops.less_equal(array_ops.rank(target), 2),
        ["target's shape should be either [batch_size, 1] or [batch_size]"])
    with ops.control_dependencies([check_shape_op]):
        target = array_ops.reshape(target, shape=[array_ops.shape(target)[0]])
    return nn.sparse_softmax_cross_entropy_with_logits(logits, target)
示例#6
0
    def fwd_fn(idx, label):
      np.random.seed(1)
      embedding_shape = (dataset_size, embedding_size)
      embedding_initializer = np.random.normal(0, 1, embedding_shape).astype(
          np.float32)
      weights_shape = (embedding_size, embedding_size)
      weights_initializer = np.random.normal(0, 1,
                                             weights_shape).astype(np.float32)

      with variable_scope.variable_scope("part1", use_resource=True):
        embedding = variable_scope.get_variable(
            "c",
            dtype=np.float32,
            initializer=embedding_initializer,
            trainable=True)

        weight = variable_scope.get_variable("w0",
                                             dtype=np.float32,
                                             initializer=weights_initializer,
                                             trainable=True)

      x = embedding_ops.embedding_lookup(embedding, idx)
      x = math_ops.matmul(x, weight)

      logits = math_ops.reduce_sum(x, axis=[-1])
      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=label))
      return loss
示例#7
0
def sparse_softmax_cross_entropy(logits, labels, weights=1.0, scope=None):
    """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of size [`batch_size`], then the loss weights apply to each
  corresponding sample.

  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
    labels: [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64`
      in the range `[0, num_classes)`.
    weights: Coefficients for the loss. The tensor must be a scalar or a tensor
      of shape [batch_size] or [batch_size, 1].
    scope: the scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shapes of `logits`, `labels`, and `weights` are
      incompatible, or if `weights` is None.
  """
    with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                        [logits, labels, weights]) as scope:
        labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])

        losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=logits,
                                                             name="xentropy")
        return compute_weighted_loss(losses, weights, scope=scope)
示例#8
0
def _softmax_cross_entropy_loss(logits, target):
  check_shape_op = control_flow_ops.Assert(
      math_ops.less_equal(array_ops.rank(target), 2),
      ["target's shape should be either [batch_size, 1] or [batch_size]"])
  with ops.control_dependencies([check_shape_op]):
    target = array_ops.reshape(target, shape=[array_ops.shape(target)[0]])
  return nn.sparse_softmax_cross_entropy_with_logits(logits, target)
示例#9
0
 def stage3(x, label):
     with variable_scope.variable_scope("stage3", use_resource=True):
         x = math_ops.reduce_mean(x, axis=[1, 2])
         x = fc(x, 100)
         loss = math_ops.reduce_mean(
             nn.sparse_softmax_cross_entropy_with_logits(logits=x,
                                                         labels=label))
         return loss
示例#10
0
def xent(y_true, y_pred):
  y_true = keras.backend.cast(
      keras.backend.reshape(y_true, (-1,)),
      dtypes.int32)

  return nn.sparse_softmax_cross_entropy_with_logits(
      labels=y_true,
      logits=y_pred)
示例#11
0
def model():
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        X_expand = tf.expand_dims(X_pl, axis=2)
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        cell_fw = tf.nn.rnn_cell.GRUCell(205)
        cell_bw = tf.nn.rnn_cell.GRUCell(205)
        seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1)
        _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
            cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len,
            dtype=tf.float32)
        enc_states = tf.concat(1, enc_states)
        enc_states_drop = dropout(enc_states, is_training=is_training_pl) 
        l1 = fully_connected(enc_states_drop, 200, activation_fn=None)
        l1 = batch_norm(l1, is_training=is_training_pl)
        l1_relu = relu(l1)
        l1_dropout = dropout(l1_relu, is_training=is_training_pl)
        l2 = fully_connected(l1_dropout, 200, activation_fn=None)
        l2 = batch_norm(l2, is_training=is_training_pl)
        l2_relu = relu(l2)
        l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None)
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
示例#12
0
def _softmax_cross_entropy_loss(logits, labels):
    # Check that we got integer for classification.
    if not labels.dtype.is_integer:
        raise ValueError("Labels dtype should be integer "
                         "Instead got %s." % labels.dtype)
    # sparse_softmax_cross_entropy_with_logits requires [batch_size] labels.
    if len(labels.get_shape()) == 2:
        labels = array_ops.squeeze(labels, squeeze_dims=[1])
    return nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
示例#13
0
def _softmax_cross_entropy_loss(logits, labels):
  # Check that we got integer for classification.
  if not labels.dtype.is_integer:
    raise ValueError("Labels dtype should be integer "
                     "Instead got %s." % labels.dtype)
  # sparse_softmax_cross_entropy_with_logits requires [batch_size] labels.
  if len(labels.get_shape()) == 2:
    labels = array_ops.squeeze(labels, squeeze_dims=[1])
  return nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
示例#14
0
def model():
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        X_expand = tf.expand_dims(X_pl, axis=2)
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        cell_fw = tf.nn.rnn_cell.GRUCell(100)
        cell_bw = tf.nn.rnn_cell.GRUCell(100)
        seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1)
        _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
            cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len,
            dtype=tf.float32)
        enc_states = tf.concat(1, enc_states)
        enc_states_drop = dropout(enc_states, is_training=is_training_pl) 
        l1 = fully_connected(enc_states_drop, 100, activation_fn=None)
        l1 = batch_norm(l1, is_training=is_training_pl)
        l1_relu = relu(l1)
        l1_dropout = dropout(l1_relu, is_training=is_training_pl)
        l2 = fully_connected(l1_dropout, 100, activation_fn=None)
        l2 = batch_norm(l2, is_training=is_training_pl)
        l2_relu = relu(l2)
        l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None)
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
示例#15
0
 def body(x, labels):
     x = cached_func(x, "1")
     x = cached_func(x, "2")
     loss = math_ops.reduce_mean(
         nn.sparse_softmax_cross_entropy_with_logits(logits=x,
                                                     labels=labels))
     train_op = gradient_descent.GradientDescentOptimizer(
         0.001).minimize(loss)
     return x, train_op
示例#16
0
def _softmax_cross_entropy_loss(logits, target):
  # Check that we got integer for classification.
  if not target.dtype.is_integer:
    raise ValueError("Target's dtype should be integer "
                     "Instead got %s." % target.dtype)
  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
  if len(target.get_shape()) == 2:
    target = array_ops.squeeze(target, squeeze_dims=[1])
  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
  return loss_vec
示例#17
0
def _softmax_cross_entropy_loss(logits, target):
    # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
    # Check that we got int32/int64 for classification.
    if not target.dtype.is_compatible_with(dtypes.int64) and not target.dtype.is_compatible_with(dtypes.int32):
        raise ValueError("Target's dtype should be int32, int64 or compatible. " "Instead got %s." % target.dtype)
    # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
    if len(target.get_shape()) == 2:
        target = array_ops.squeeze(target, squeeze_dims=[1])
    loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
    return loss_vec
示例#18
0
文件: head.py 项目: sigmasharp/w266
def _softmax_cross_entropy_loss(logits, target):
  # Check that we got integer for classification.
  if not target.dtype.is_integer:
    raise ValueError("Target's dtype should be integer "
                     "Instead got %s." % target.dtype)
  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
  if len(target.get_shape()) == 2:
    target = array_ops.squeeze(target, squeeze_dims=[1])
  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
  return loss_vec
示例#19
0
def sparse_softmax_cross_entropy(
    labels, logits, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  Args:
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
      must be an index in `[0, num_classes)`. Other values will raise an
      exception when this op is run on CPU, and return `NaN` for corresponding
      loss and gradient rows on GPU.
    logits: Unscaled log probabilities of shape
      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
      `float64`.
    weights: Coefficients for the loss. This must be scalar or broadcastable to
      `labels` (i.e. same rank and each dimension is either 1 or the same).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has the same shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits`, `labels`, and `weights` are
      incompatible, or if any of them are None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                      (logits, labels, weights)) as scope:
    # As documented above in Args, labels contain class IDs and logits contains
    # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
    # therefore, expected_rank_diff=1.
    labels, logits, weights = _remove_squeezable_dimensions(
        labels, logits, weights, expected_rank_diff=1)
    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                         logits=logits,
                                                         name="xentropy")
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
示例#20
0
def model():
    tf.set_random_seed(1)
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [
            None,
        ])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        X_bn = batch_norm(X_pl, is_training=is_training_pl)
        print("X_bn", X_bn.get_shape())
        l1 = fully_connected(X_pl, num_outputs=100,
                             activation_fn=relu)  #, normalizer_fn=batch_norm)
        print("l1", l1.get_shape())
        l1_drop = dropout(l1, is_training=is_training_pl)
        print("l1_drop", l1_drop.get_shape())
        l_out = fully_connected(l1_drop,
                                num_outputs=num_classes,
                                activation_fn=None)
        print("l_out", l_out.get_shape())
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (tf.clip_by_global_norm(
            gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars,
                                             global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
示例#21
0
 def body(a, b, labels):
     a, b = _test_multi_conv_wrapper(convs)(a, b)
     a, b = _test_multi_conv_wrapper(convs)(a, b)
     a = math_ops.reduce_mean(a, axis=[1, 2]) + math_ops.reduce_mean(
         b, axis=[1, 2])
     loss = math_ops.reduce_mean(
         nn.sparse_softmax_cross_entropy_with_logits(logits=a,
                                                     labels=labels))
     train_op = gradient_descent.GradientDescentOptimizer(
         0.001).minimize(loss)
     return a, train_op
示例#22
0
def _softmax_cross_entropy_loss(logits, target):
  # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
  # Check that we got int32/int64 for classification.
  if (not target.dtype.is_compatible_with(dtypes.int64) and
      not target.dtype.is_compatible_with(dtypes.int32)):
    raise ValueError("Target's dtype should be int32, int64 or compatible. "
                     "Instead got %s." % target.dtype)
  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
  if len(target.get_shape()) == 2:
    target = array_ops.squeeze(target, squeeze_dims=[1])
  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
  return loss_vec
示例#23
0
    def graph(x, label):
      x = conv(x, 3, 1, 16)
      x = nn.relu(x)

      x = conv(x, 3, 1, 100)
      x = nn.relu(x)

      x = math_ops.reduce_mean(x, axis=[1, 2])
      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label))

      opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)
      return loss, opt
示例#24
0
    def graph(img, label):
      x = conv(img, 7, 2, 16)
      x = nn.relu(x)
      x = max_pool(x, ksize=3, stride=2)

      x = block("b", 2, 64, 1, x)

      x = math_ops.reduce_mean(x, axis=[1, 2])
      x = fc(x, 100)
      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label))

      opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)
      return loss, opt
示例#25
0
def sparse_softmax_cross_entropy(
        labels,
        logits,
        weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
    """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape [`batch_size`], then the loss weights apply to each
  corresponding sample.

  Args:
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
      must be an index in `[0, num_classes)`. Other values will raise an
      exception when this op is run on CPU, and return `NaN` for corresponding
      loss and gradient rows on GPU.
    logits: Unscaled log probabilities of shape
      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
    weights: Coefficients for the loss. This must be scalar or of same rank as
      `labels`
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shapes of logits, labels, and weight are incompatible, or
      if `weights` is None.
  """
    with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                        (logits, labels, weights)) as scope:
        # As documented above in Args, labels contain class IDs and logits contains
        # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
        # therefore, expected_rank_diff=1.
        labels, logits, weights = _remove_squeezable_dimensions(
            labels, logits, weights, expected_rank_diff=1)
        losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=logits,
                                                             name="xentropy")
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
示例#26
0
 def stage5(x, label):
   # Ruse the weight here.
   with variable_scope.variable_scope("vs", use_resource=True, reuse=True):
     weight = variable_scope.get_variable(
         "w0",
         shape=[4, 4],
         dtype=np.float32,
         initializer=init_ops.ones_initializer())
     x = math_ops.matmul(x, weight)
     logits = math_ops.reduce_mean(x, axis=[1])
     loss = math_ops.reduce_mean(
         nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=label))
     return loss
示例#27
0
def model():
    tf.set_random_seed(1)
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        X_bn = batch_norm(X_pl, is_training=is_training_pl)
        print("X_bn", X_bn.get_shape())
        l1 = fully_connected(X_pl, num_outputs=100, activation_fn=relu)#, normalizer_fn=batch_norm)
        print("l1", l1.get_shape())
        l1_drop = dropout(l1, is_training=is_training_pl)
        print("l1_drop", l1_drop.get_shape())
        l_out = fully_connected(l1_drop, num_outputs=num_classes, activation_fn=None)
        print("l_out", l_out.get_shape())
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
示例#28
0
    def fwd_fn(idx, label):
      with variable_scope.variable_scope("part1", use_resource=True):
        embedding = variable_scope.get_variable(
            "c",
            shape=[10, 1216],
            dtype=np.float32,
            initializer=init_ops.constant_initializer(10.01),
            trainable=True)
      x = embedding_ops.embedding_lookup(embedding, idx)

      logits = math_ops.reduce_sum(x, axis=[-1])
      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=label))
      return loss
示例#29
0
def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(logits,
                                                                labels,
                                                                name=None):
  """Computes sparse softmax cross entropy between `logits` and `labels`.

  This function diffs from tf.nn.sparse_softmax_cross_entropy_with_logits only
  in the argument order.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  For this operation, the probability of a given label is considered
  exclusive.  That is, soft classes are not allowed, and the `labels` vector
  must provide a single specific index for the true class for each row of
  `logits` (each minibatch entry).  For soft softmax classification with
  a probability distribution for each entry, see
  `softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a softmax
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  A common use case is to have logits of shape `[batch_size, num_classes]` and
  labels of shape `[batch_size]`. But higher dimensions are supported.

  Args:

    logits: Unscaled log probabilities of rank `r` and shape
      `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
      `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
      Other values will raise an exception when this op is run on CPU, and
      return `NaN` for corresponding corresponding loss and gradient rows
      on GPU.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `labels` and of the same type as `logits`
    with the softmax cross entropy loss.

  Raises:
    ValueError: If logits are scalars (need to have rank >= 1) or if the rank
      of the labels is not equal to the rank of the logits minus one.
  """
  return nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, name=name)
示例#30
0
def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(
        logits, labels, name=None):
    """Computes sparse softmax cross entropy between `logits` and `labels`.

  This function diffs from tf.nn.sparse_softmax_cross_entropy_with_logits only
  in the argument order.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  For this operation, the probability of a given label is considered
  exclusive.  That is, soft classes are not allowed, and the `labels` vector
  must provide a single specific index for the true class for each row of
  `logits` (each minibatch entry).  For soft softmax classification with
  a probability distribution for each entry, see
  `softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a softmax
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  A common use case is to have logits of shape `[batch_size, num_classes]` and
  labels of shape `[batch_size]`. But higher dimensions are supported.

  Args:

    logits: Unscaled log probabilities of rank `r` and shape
      `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
      `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
      Other values will raise an exception when this op is run on CPU, and
      return `NaN` for corresponding corresponding loss and gradient rows
      on GPU.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `labels` and of the same type as `logits`
    with the softmax cross entropy loss.

  Raises:
    ValueError: If logits are scalars (need to have rank >= 1) or if the rank
      of the labels is not equal to the rank of the labels minus one.
  """
    return nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels,
                                                       name=name)
示例#31
0
    def fwd_fn(img, label):
      with variable_scope.variable_scope("part1", use_resource=True):
        x = conv(img, 7, 2, 8)
        x = nn.relu(x)
        x = max_pool(x, ksize=3, stride=2)

      with variable_scope.variable_scope("part2", use_resource=True):
        x = block("b", 2, 32, 1, x)

      with variable_scope.variable_scope("part3", use_resource=True):
        x = math_ops.reduce_mean(x, axis=[1, 2])
        x = fc(x, 100)
        loss = math_ops.reduce_mean(
            nn.sparse_softmax_cross_entropy_with_logits(logits=x,
                                                        labels=label))
        return loss
示例#32
0
    def _loss(self, logits, target, weight_tensor):
        if self._n_classes < 2:
            loss_vec = math_ops.square(logits - math_ops.to_float(target))
        elif self._n_classes == 2:
            loss_vec = nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target))
        else:
            loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, array_ops.reshape(target, [-1]))

        if weight_tensor is None:
            return math_ops.reduce_mean(loss_vec, name="loss")
        else:
            loss_vec = array_ops.reshape(loss_vec, shape=(-1,))
            loss_vec = math_ops.mul(loss_vec, array_ops.reshape(weight_tensor, shape=(-1,)))
            return math_ops.div(
                math_ops.reduce_sum(loss_vec), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss"
            )
示例#33
0
def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
  """Categorical crossentropy with integer targets.
  Arguments:
      target: An integer tensor.
      output: A tensor resulting from a softmax
          (unless `from_logits` is True, in which
          case `output` is expected to be the logits).
      from_logits: Boolean, whether `output` is the
          result of a softmax, or is a tensor of logits.
      axis: Int specifying the channels axis. `axis=-1` corresponds to data
          format `channels_last', and `axis=1` corresponds to data format
          `channels_first`.
  Returns:
      Output tensor.
  Raises:
      ValueError: if `axis` is neither -1 nor one of the axes of `output`.
  """
  if not from_logits:
    if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or
        output.op.type != 'Softmax'):
      epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
      output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
      output = math_ops.log(output)
    else:
      # When softmax activation function is used for output operation, we
      # use logits from the softmax function directly to compute loss in order
      # to prevent collapsing zero when training.
      # See b/117284466
      assert len(output.op.inputs) == 1
      output = output.op.inputs[0]

  rank = len(output.shape)
  axis = axis % rank
  if axis != rank - 1:
    permutation = list(range(axis)) + list(range(axis + 1, rank)) + [axis]
    output = array_ops.transpose(output, perm=permutation)

  output_shape = output.shape
  targets = cast(flatten(target), 'int64')
  logits = array_ops.reshape(output, [-1, int(output_shape[-1])])
  res = nn.sparse_softmax_cross_entropy_with_logits(
      labels=targets, logits=logits)
  if len(output_shape) >= 3:
    # If our output includes timesteps or spatial dimensions we need to reshape
    return array_ops.reshape(res, array_ops.shape(output)[:-1])
  else:
    return res
示例#34
0
def sparse_softmax_cross_entropy(labels,
                                 logits,
                                 weights=1.0,
                                 scope=None,
                                 loss_collection=ops.GraphKeys.LOSSES):
    """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape [`batch_size`], then the loss weights apply to each
  corresponding sample.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
      `int64` in the range `[0, num_classes)`.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Coefficients for the loss. This must be of shape `[batch_size]` or
      `[batch_size, 1]`.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shapes of logits, labels, and weight are incompatible, or
      if `weights` is None.
  """
    with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                        [logits, labels, weights]) as scope:
        labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])

        losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=logits,
                                                             name="xentropy")
        # Reshape losses to [batch_size, 1] to be consistent with weights.
        losses = array_ops.reshape(losses,
                                   shape=[array_ops.shape(losses)[0], 1])
        return compute_weighted_loss(losses, weights, scope, loss_collection)
示例#35
0
    def graph(x, label):
      a = fc(x, 48)
      a = nn.relu(a)
      b = fc(x, 48)
      b = nn.relu(b)
      x = a + b

      a = fc(x, 100)
      a = nn.relu(a)
      b = fc(x, 100)
      b = nn.relu(b)
      x = a + b

      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=x, labels=label))

      opt = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)
      return loss, opt
示例#36
0
def sparse_softmax_cross_entropy(
    labels, logits, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape [`batch_size`], then the loss weights apply to each
  corresponding sample.

  Args:
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
      must be an index in `[0, num_classes)`. Other values will raise an
      exception when this op is run on CPU, and return `NaN` for corresponding
      loss and gradient rows on GPU.
    logits: Unscaled log probabilities of shape
      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
    weights: Coefficients for the loss. This must be scalar or of same rank as
      `labels`
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shapes of logits, labels, and weight are incompatible, or
      if `weights` is None.
  """
  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                      (logits, labels, weights)) as scope:
    # As documented above in Args, labels contain class IDs and logits contains
    # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
    # therefore, expected_rank_diff=1.
    labels, logits, weights = _remove_squeezable_dimensions(
        labels, logits, weights, expected_rank_diff=1)
    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                         logits=logits,
                                                         name="xentropy")
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
示例#37
0
 def _loss_vec(self, logits, target):
   if self._n_classes == 2:
     # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
     if len(target.get_shape()) == 1:
       target = array_ops.expand_dims(target, dim=[1])
     loss_vec = nn.sigmoid_cross_entropy_with_logits(
         logits, math_ops.to_float(target))
   else:
     # Check that we got int32/int64 for classification.
     if (not target.dtype.is_compatible_with(dtypes.int64) and
         not target.dtype.is_compatible_with(dtypes.int32)):
       raise ValueError("Target's dtype should be int32, int64 or compatible. "
                        "Instead got %s." % target.dtype)
     # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
     if len(target.get_shape()) == 2:
       target = array_ops.squeeze(target, squeeze_dims=[1])
     loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
         logits, target)
   return loss_vec
示例#38
0
 def body(a, b, c, labels):
     with variable_scope.variable_scope("vs", use_resource=True):
         w0 = variable_scope.get_variable(
             "w0",
             shape=[64, 64],
             dtype=np.float32,
             initializer=init_ops.ones_initializer())
         w1 = variable_scope.get_variable(
             "w1",
             shape=[64, 64],
             dtype=np.float32,
             initializer=init_ops.ones_initializer())
     a = func(a, w0, b)
     a = a - func(a, w1, c)
     loss = math_ops.reduce_mean(
         nn.sparse_softmax_cross_entropy_with_logits(logits=a,
                                                     labels=labels))
     train_op = gradient_descent.GradientDescentOptimizer(
         0.001).minimize(loss)
     return a, train_op
  def _loss(self, logits, target, weight_tensor):
    if self._n_classes < 2:
      loss_vec = math_ops.square(logits - math_ops.to_float(target))
    elif self._n_classes == 2:
      loss_vec = nn.sigmoid_cross_entropy_with_logits(logits,
                                                      math_ops.to_float(target))
    else:
      loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
          logits, array_ops.reshape(target, [-1]))

    if weight_tensor is None:
      return math_ops.reduce_mean(loss_vec, name="loss")
    else:
      loss_vec = array_ops.reshape(loss_vec, shape=(-1,))
      loss_vec = math_ops.mul(
          loss_vec, array_ops.reshape(weight_tensor, shape=(-1,)))
      return math_ops.div(
          math_ops.reduce_sum(loss_vec),
          math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
          name="loss")
示例#40
0
def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None,
                                 loss_collection=ops.GraphKeys.LOSSES):
  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape [`batch_size`], then the loss weights apply to each
  corresponding sample.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
      `int64` in the range `[0, num_classes)`.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Coefficients for the loss. This must be of shape `[batch_size]` or
      `[batch_size, 1]`.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shapes of logits, labels, and weight are incompatible, or
      if `weights` is None.
  """
  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
                      [logits, labels, weights]) as scope:
    labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])

    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                         logits=logits,
                                                         name="xentropy")
    # Reshape losses to [batch_size, 1] to be consistent with weights.
    losses = array_ops.reshape(losses, shape=[array_ops.shape(losses)[0], 1])
    return compute_weighted_loss(losses, weights, scope, loss_collection)
示例#41
0
 def forward(self, y_true, y_pred):
     dims = list(range(len(y_pred.get_shape())))
     if self.axis != -1 and self.axis not in dims:
         raise ValueError("Axis out of y_pred's dimensions")
     if self.axis != -1 and self.axis != dims[-1]:
         perm = dims[:self.axis] + dims[self.axis + 1:]
         perm += [self.axis]
         y_pred = array_ops.transpose(y_pred, perm=perm)
     if not self.from_logits:
         if isinstance(y_pred, (ops.EagerTensor, variables.Variable))\
                 or y_pred.op.type != 'Softmax':
             y_pred = clip_ops.clip_by_value(t=y_pred,
                                             clip_value_min=epsilon,
                                             clip_value_max=1 - epsilon)
             y_pred = math_ops.log(y_pred)
         else:
             # When softmax activation function is used for output operation, we
             # use logits from the softmax function directly to compute loss in order
             # to prevent collapsing zero when training.
             # See b/117284466
             assert len(y_pred.op.inputs) == 1
             y_pred = y_pred.op.inputs[0]
     rank = len(y_pred.shape)
     self.axis = self.axis % rank
     if self.axis != rank - 1:
         permutation = list(range(self.axis)) + list(
             range(self.axis + 1, rank)) + [self.axis]
         y_pred = array_ops.transpose(y_pred, perm=permutation)
     shape = y_pred.shape
     y_true = F.int64(array_ops.reshape(y_true, [-1]))
     logits = array_ops.reshape(y_pred, [-1, int(shape[-1])])
     res = nn.sparse_softmax_cross_entropy_with_logits(labels=y_true,
                                                       logits=logits)
     if len(shape) >= 3:
         return array_ops.reshape(res, array_ops.shape(y_pred)[:-1])
     else:
         return res
示例#42
0
# Up sample 1. Upscale to 128 x 128 x 24
wt1 = tf.Variable(tf.truncated_normal([3, 3, 24, 32]))
convt_1 = sigmoid(
    conv2d_transpose(conv_0,
                     filter=wt1,
                     output_shape=[batch_size, 128, 128, 24],
                     strides=[1, 1, 1, 1]))
print("Deconvolution 1:", convt_1)

# Up sample 2. Upscale to 256 x 256 x 2
wt2 = tf.Variable(tf.truncated_normal([3, 3, 2, 24]))
convt_2 = sigmoid(
    conv2d_transpose(convt_1,
                     filter=wt2,
                     output_shape=[batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 2],
                     strides=[1, 2, 2, 1]))
print("Deconvolution 2:", convt_2)
end1 = time.time()
print("Checkpoint model", end1 - start)

# Loss computation
logits = tf.reshape(convt_2, [-1, num_labels])
reshaped_labels = tf.reshape(labels, [-1])
cross_entropy = sparse_softmax_cross_entropy_with_logits(
    logits=logits, labels=reshaped_labels)
loss = tf.reduce_mean(cross_entropy)

optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
end2 = time.time()
print("Checkpoint eval", end2 - end1)
示例#43
0
def adaptive_softmax_loss(inputs,
                          labels,
                          cutoff,
                          project_factor=4,
                          initializer=None,
                          name=None):
  """Computes and returns the adaptive softmax loss (a improvement of 
  hierarchical softmax).
    
  See [Efficient softmax approximation for GPUs](https://arxiv.org/pdf/1609.04309v2.pdf).
        
  This is a faster way to train a softmax classifier over a huge number of 
  classes, and can be used for **both training and prediction**. For example, it 
  can be used for training a Language Model with a very huge vocabulary, and 
  the trained languaed model can be used in speech recognition, text generation, 
  and machine translation very efficiently.
  
  Args:
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
      activations of the input network.
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
      `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
    cutoff: A list indicating the limits of the different clusters.
    project_factor: A floating point value greater or equal to 1.0. The projection 
      factor between two neighboring clusters.
    initializer: Initializer for adaptive softmax variables (optional).
    name: A name for the operation (optional).
  Returns:
    loss: A `batch_size` 1-D tensor of the adaptive softmax cross entropy loss.
    training_losses: A list of 1-D tensors of adaptive softmax loss for each 
      cluster, which can be used for calculating the gradients and back 
      propagation when training.
  """
  input_dim = int(inputs.get_shape()[1])
  sample_num = int(inputs.get_shape()[0])
  cluster_num = len(cutoff) - 1
  with ops.name_scope(name, "AdaptiveSoftmax"):
    if initializer is None:
      stdv = math.sqrt(1. / input_dim)
      initializer = init_ops.random_uniform_initializer(-stdv * 0.8, stdv * 0.8)

    head_dim = cutoff[0] + cluster_num
    head_w = variable_scope.get_variable("adaptive_softmax_head_w", 
                             [input_dim, head_dim], initializer=initializer)

    tail_project_factor = project_factor
    tail_w = []
    for i in range(cluster_num):
      project_dim = max(1, input_dim // tail_project_factor)
      tail_dim = cutoff[i + 1] - cutoff[i]
      tail_w.append([
        variable_scope.get_variable("adaptive_softmax_tail{}_proj_w".format(i+1), 
                        [input_dim, project_dim], initializer=initializer),
        variable_scope.get_variable("adaptive_softmax_tail{}_w".format(i+1), 
                        [project_dim, tail_dim], initializer=initializer)
      ])
      tail_project_factor *= project_factor

    # Get tail masks and update head labels
    training_losses = []
    loss = array_ops.zeros([sample_num], dtype=dtypes.float32)
    head_labels = labels
    for i in range(cluster_num):
      mask = math_ops.logical_and(math_ops.greater_equal(labels, cutoff[i]), 
                                  math_ops.less(labels, cutoff[i + 1]))
      
      # Update head labels
      head_labels = tf.where(mask, array_ops.constant([cutoff[0] + i] *
                            sample_num), head_labels)

      # Compute tail loss
      tail_inputs = array_ops.boolean_mask(inputs, mask)
      tail_logits = math_ops.matmul(math_ops.matmul(tail_inputs, tail_w[i][0]), 
                                    tail_w[i][1])
      tail_labels = array_ops.boolean_mask(labels - cutoff[i], mask)
      tail_loss = nn.sparse_softmax_cross_entropy_with_logits(labels=tail_labels, logits=tail_logits)
      training_losses.append(tail_loss)
      aligned_tail_loss = sparse_tensor.SparseTensor(
        array_ops.squeeze(array_ops.where(mask)), tail_loss, [sample_num])
      loss += sparse_ops.sparse_tensor_to_dense(aligned_tail_loss)

    # Compute head loss
    head_logits = math_ops.matmul(inputs, head_w)
    head_loss = nn.sparse_softmax_cross_entropy_with_logits(logits=head_logits, labels=head_labels)
    loss += head_loss
    training_losses.append(head_loss)

    return loss, training_losses