示例#1
0
    def _get_train_ops(self, features, targets):
        """Method that builds model graph and returns trainer ops.

    Expected to be overriden by sub-classes that require custom support.
    This implementation uses `model_fn` passed as parameter to constructor to
    build model.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
        _, loss = self._model_fn(features, targets, ModeKeys.TRAIN)
        # TODO(ipolosukhin): Move this to TensorFlowEstimator when
        # moving out training.
        if isinstance(self.learning_rate, types.FunctionType):
            learning_rate = self.learning_rate(
                contrib_framework.get_global_step())
        else:
            learning_rate = self.learning_rate
        if isinstance(self.optimizer, types.FunctionType):
            optimizer = self.optimizer(learning_rate)
        else:
            optimizer = self.optimizer
        train_op = layers.optimize_loss(loss,
                                        contrib_framework.get_global_step(),
                                        learning_rate=learning_rate,
                                        optimizer=optimizer,
                                        clip_gradients=self.clip_gradients)
        # Add update ops.
        train_op = control_flow_ops.group(train_op,
                                          *ops.get_collection('update_ops'))
        return train_op, loss
示例#2
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Expected to be overriden by sub-classes that require custom support.
    This implementation uses `model_fn` passed as parameter to constructor to
    build model.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    _, loss = self._model_fn(features, targets, ModeKeys.TRAIN)
    # TODO(ipolosukhin): Move this to TensorFlowEstimator when
    # moving out training.
    if isinstance(self.learning_rate, types.FunctionType):
      learning_rate = self.learning_rate(contrib_framework.get_global_step())
    else:
      learning_rate = self.learning_rate
    if isinstance(self.optimizer, types.FunctionType):
      optimizer = self.optimizer(learning_rate)
    else:
      optimizer = self.optimizer
    train_op = layers.optimize_loss(
        loss,
        contrib_framework.get_global_step(),
        learning_rate=learning_rate,
        optimizer=optimizer,
        clip_gradients=self.clip_gradients)
    # Add update ops.
    train_op = control_flow_ops.group(
        train_op, *ops.get_collection('update_ops'))
    return train_op, loss
示例#3
0
 def before_run(self, run_context):
   loss = (self.loss_op if self.loss_op is not None else
           run_context.session.graph.get_operation_by_name(
               LOSS_NAME).outputs[0])
   return session_run_hook.SessionRunArgs(
       {'global_step': contrib_framework.get_global_step(),
        'current_loss': loss})
示例#4
0
def lenet5_model(X,
                 y,
                 mode,
                 image_size=(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 1),
                 pool_size=(1, 2, 2, 1)):
    X = tf.pad(tf.reshape(X, image_size), [[0, 0], [2, 2], [2, 2], [0, 0]],
               mode="CONSTANT")
    print("x ", X.shape)
    print("y ", y.shape)

    layer1 = lenet5_layer(X, 6, [5, 5], pool_size)
    print("layer1 ", layer1.shape)
    layer2 = lenet5_layer(layer1, 16, [5, 5], pool_size)
    print("layer2 ", layer2.shape)
    layer3 = layers.conv2d(layer2,
                           num_outputs=120,
                           kernel_size=[5, 5],
                           activation_fn=tf.nn.softmax,
                           padding='VALID')
    print("layer3 ", layer3.shape)
    result = dense_layer(layer3, [84, 10], keep_prob=0.5)
    result = tf.reshape(result, [-1, 10])
    print("result ", result.shape)
    prediction, loss = learn.models.logistic_regression_zero_init(result, y)
    train_op = layers.optimize_loss(loss,
                                    framework.get_global_step(),
                                    optimizer='Adagrad',
                                    learning_rate=0.1)
    return prediction, loss, train_op
def conv_model(X, Y_, mode):
    XX = tf.reshape(X, [-1, 28, 28, 1])
    biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
    Y1 = layers.conv2d(XX,
                       num_outputs=6,
                       kernel_size=[6, 6],
                       biases_initializer=biasInit)
    Y2 = layers.conv2d(Y1,
                       num_outputs=12,
                       kernel_size=[5, 5],
                       stride=2,
                       biases_initializer=biasInit)
    Y3 = layers.conv2d(Y2,
                       num_outputs=24,
                       kernel_size=[4, 4],
                       stride=2,
                       biases_initializer=biasInit)
    Y4 = layers.flatten(Y3)
    Y5 = layers.relu(Y4, 200, biases_initializer=biasInit)
    Ylogits = layers.linear(Y5, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_,
                                                                    10))) * 100
    train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001,
                                    "Adam")
    return {"predictions": predict, "classes": classes}, loss, train_op
示例#6
0
  def model_fn(features, labels, mode):
    """Builds generic graph for training or eval."""

    # TODO logits = A tensor representing the pre-softmax likelyhood of
    # each digit. 
    tensors = {}
    # Add to the Graph the Ops for loss calculation.
    if mode == ModeKeys.INFER:
      # TODO tensors['digit'] = Tensor representing the predicted digit for 'features'
      # Since 'labels' is None we can't calculate a loss
      loss_op = None
    else:
      # TODO loss_op = Operation to calculate loss
      tensors['loss'] = loss_op
      tf.scalar_summary('loss', loss_op)

    # Add to the Graph the Ops for accuracy calculation.
    if mode == ModeKeys.EVAL:
      # TODO accuracy_op = Calculate the accuracy of the inferred digits given 'labels'
      tensors['accuracy'] = accuracy_op
      tf.scalar_summary('training/hptuning/metric', accuracy_op)

    # Add to the Graph the Ops that calculate and apply gradients.
    if mode == ModeKeys.TRAIN:
      global_step = framework.get_global_step()
      # TODO train_op = the gradient descent optimizer with the given learning rate
      # that minimizes the loss
    else:
      train_op = None

    return tensors, loss_op, train_op
示例#7
0
def softmax_model(X, Y_, mode):
    Ylogits = layers.linear(X, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100
    train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam")
    return {"predictions":predict, "classes": classes}, loss, train_op
示例#8
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    features, _, spec = data_ops.ParseDataTensorOrDict(features)
    labels = data_ops.ParseLabelTensorOrDict(targets)
    _assert_float32(features)
    _assert_float32(labels)

    graph_builder = self.graph_builder_class(
        self.params, device_assigner=self.device_assigner,
        **self.construction_args)

    epoch = None
    if self.data_feeder:
      epoch = self.data_feeder.make_epoch_variable()

    train = control_flow_ops.group(
        graph_builder.training_graph(
            features, labels, data_spec=spec, epoch=epoch,
            **self.training_args),
        state_ops.assign_add(contrib_framework.get_global_step(), 1))

    self.training_loss = graph_builder.training_loss(features, targets)

    return train, self.training_loss
示例#9
0
def model_fn(features, labels, mode, params):
    scores = predict_scores(features)

    if mode == ModeKeys.INFER:
        return EstimatorSpec(mode, predictions=scores)

    positive_scores = lookup_positives(scores, labels['click_position'])
    logits = create_diffs(positive_scores, scores)
    lbls = create_label(labels['click_position'])
    ele_loss = elementwise_loss(lbls, logits, labels['normal_mask']) * lbls
    loss = reduce_sum(ele_loss)
    true_lbl = true_label(features, labels)

    if mode == ModeKeys.EVAL:
        return EstimatorSpec(mode,
                             loss=loss,
                             eval_metric_ops={
                                 'acc':
                                 mean(
                                     accuracy(
                                         argmax(noise_label(labels), axis=1),
                                         argmax(to_one_hot(scores), axis=1)))
                             })
    else:
        optimizer = AdamOptimizer(learning_rate=params['learning_rate'])
        train_op = optimizer.minimize(loss, global_step=get_global_step())

        return EstimatorSpec(mode, loss=loss, train_op=train_op)
示例#10
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    features, spec = data_ops.ParseDataTensorOrDict(features)
    labels = data_ops.ParseLabelTensorOrDict(targets)

    graph_builder = self.graph_builder_class(
        self.params, device_assigner=self.device_assigner,
        **self.construction_args)

    epoch = None
    if self.data_feeder:
      epoch = self.data_feeder.make_epoch_variable()

    train = control_flow_ops.group(
        graph_builder.training_graph(
            features, labels, data_spec=spec, epoch=epoch,
            **self.training_args),
        state_ops.assign_add(contrib_framework.get_global_step(), 1))

    self.training_loss = graph_builder.training_loss()

    return train, self.training_loss
示例#11
0
 def before_run(self, run_context):
   loss = (self.loss_op if self.loss_op is not None else
           run_context.session.graph.get_operation_by_name(
               LOSS_NAME).outputs[0])
   return session_run_hook.SessionRunArgs(
       {'global_step': contrib_framework.get_global_step(),
        'current_loss': loss})
示例#12
0
def auto_encoder(x_1, x_2, x_mask_1, x_mask_2, y, dropout, opt):
    x_1_emb, W_emb = embedding(x_1, opt)  # batch L emb
    x_2_emb = tf.nn.embedding_lookup(W_emb, x_2)

    x_1_emb = tf.nn.dropout(x_1_emb, dropout)  # batch L emb
    x_2_emb = tf.nn.dropout(x_2_emb, dropout)  # batch L emb

    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    x_1_emb = layers.fully_connected(tf.squeeze(x_1_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=None)  # batch L emb
    x_2_emb = layers.fully_connected(tf.squeeze(x_2_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=True)

    x_1_emb = tf.expand_dims(x_1_emb, 3)  # batch L emb 1
    x_2_emb = tf.expand_dims(x_2_emb, 3)

    if opt.encoder == 'aver':
        H_enc_1 = aver_emb_encoder(x_1_emb, x_mask_1)
        H_enc_2 = aver_emb_encoder(x_2_emb, x_mask_2)

    elif opt.encoder == 'max':
        H_enc_1 = max_emb_encoder(x_1_emb, x_mask_1, opt)
        H_enc_2 = max_emb_encoder(x_2_emb, x_mask_2, opt)

    elif opt.encoder == 'concat':
        H_enc_1 = concat_emb_encoder(x_1_emb, x_mask_1, opt)
        H_enc_2 = concat_emb_encoder(x_2_emb, x_mask_2, opt)

    # discriminative loss term
    if opt.combine_enc == 'mult':
        H_enc = tf.multiply(H_enc_1, H_enc_2)  # batch * n_gan

    if opt.combine_enc == 'concat':
        H_enc = tf.concat([H_enc_1, H_enc_2], 1)

    if opt.combine_enc == 'sub':
        H_enc = tf.subtract(H_enc_1, H_enc_2)

    if opt.combine_enc == 'mix':
        H_1 = tf.multiply(H_enc_1, H_enc_2)
        H_2 = tf.concat([H_enc_1, H_enc_2], 1)
        H_3 = tf.subtract(H_enc_1, H_enc_2)
        H_enc = tf.concat([H_1, H_2, H_3], 1)

    # calculate the accuracy
    logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.category, is_reuse=None)
    prob = tf.nn.softmax(logits)

    correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))

    train_op = layers.optimize_loss(
        loss,
        framework.get_global_step(),
        optimizer='Adam',
        # variables=d_vars,
        learning_rate=opt.lr)

    return accuracy, loss, train_op, W_emb
示例#13
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        graph_builder = graph_builder_class(params,
                                            device_assigner=device_assigner)
        inference = {}
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.INFER):
            inference[eval_metrics.INFERENCE_PROB_NAME] = (
                graph_builder.inference_graph(features))

            if not params.regression:
                inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                    inference[eval_metrics.INFERENCE_PROB_NAME], 1)

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        loss_deps = []
        training_graph = None
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            training_graph = control_flow_ops.group(
                graph_builder.training_graph(features,
                                             labels,
                                             input_weights=weights,
                                             num_trainers=num_trainers,
                                             trainer_id=trainer_id),
                state_ops.assign_add(contrib_framework.get_global_step(), 1))
            loss_deps.append(training_graph)

        training_loss = None
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.TRAIN):
            with ops.control_dependencies(loss_deps):
                training_loss = graph_builder.training_loss(features,
                                                            labels,
                                                            name=LOSS_NAME)
            if report_feature_importances and mode == model_fn_lib.ModeKeys.EVAL:
                training_loss = logging_ops.Print(
                    training_loss, [graph_builder.feature_importances()],
                    summarize=1000)
        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        training_hooks = []
        if early_stopping_rounds:
            training_hooks.append(TensorForestLossHook(early_stopping_rounds))

        return model_fn_lib.ModelFnOps(mode=mode,
                                       predictions=inference,
                                       loss=training_loss,
                                       train_op=training_graph,
                                       training_hooks=training_hooks)
示例#14
0
def conv_model_train_op(loss, mode):
    return layers.optimize_loss(
        loss,
        framework.get_global_step(),
        learning_rate=0.003,
        optimizer="Adam",
        # to remove learning rate decay, comment the next line
        learning_rate_decay_fn=lambda lr, step: 0.0001 + tf.train.
        exponential_decay(lr, step, -2000, math.e
                          )) if mode == learn.ModeKeys.TRAIN else None
示例#15
0
def my_model(features, target):
    target = tf.one_hot(target, 3, 1, 0)

    logits, loss = learn.models.logistic_regression(features, target)

    train_op = layers.optimize_loss(loss,
                                    framework.get_global_step(),
                                    optimizer='Adagrad',
                                    learning_rate=0.01)
    return tf.argmax(logits, 1), loss, train_op
示例#16
0
 def _model_fn(features, targets, mode):
   ops.get_default_graph().add_to_collection('IS_TRAINING', mode == 'train')
   if self.class_weight is not None:
     constant_op.constant(self.class_weight, name='class_weight')
   predictions, loss = model_fn(features, targets)
   if isinstance(self.learning_rate, types.FunctionType):
     learning_rate = self.learning_rate(contrib_framework.get_global_step())
   else:
     learning_rate = self.learning_rate
   if isinstance(self.optimizer, types.FunctionType):
     optimizer = self.optimizer(learning_rate)
   else:
     optimizer = self.optimizer
   train_op = layers.optimize_loss(
       loss,
       contrib_framework.get_global_step(),
       learning_rate=learning_rate,
       optimizer=optimizer,
       clip_gradients=self.clip_gradients)
   return predictions, loss, train_op
示例#17
0
 def _build_model(self, data, target):
     ids = tensorflow.split(1, self.n_ids, data)
     node_vectors = [
         learn.ops.categorical_variable(ids[i], self.vocabulary_sizes[i], self.layer_size, str(i))
         for i in range(self.n_ids)
     ]
     activation_in = tensorflow.squeeze(tensorflow.concat(2, node_vectors), [1])
     activation_out = layers.stack(activation_in, layers.fully_connected, self.hidden_units_formation)
     prediction, loss = learn.models.linear_regression(activation_out, target)
     train_op = layers.optimize_loss(loss, framework.get_global_step(), self.learning_rate, "SGD")
     return prediction, loss, train_op
 def _loss_to_train_op(self, loss):
     """Map `loss` to a training op."""
     with ops.name_scope('loss_to_train_op'):
         trainable_variables = ops.get_default_graph().get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
         global_step = contrib_framework.get_global_step()
         gradients = self._optimizer.compute_gradients(
             loss=loss, var_list=trainable_variables)
         processed_gradients = self._process_gradients(gradients)
         return self._optimizer.apply_gradients(processed_gradients,
                                                global_step=global_step)
 def _loss_to_train_op(self, loss):
   """Map `loss` to a training op."""
   with ops.name_scope('loss_to_train_op'):
     trainable_variables = ops.get_default_graph().get_collection(
         ops.GraphKeys.TRAINABLE_VARIABLES)
     global_step = contrib_framework.get_global_step()
     gradients = self._optimizer.compute_gradients(
         loss=loss, var_list=trainable_variables)
     processed_gradients = self._process_gradients(gradients)
     return self._optimizer.apply_gradients(
         processed_gradients, global_step=global_step)
示例#20
0
  def _model_fn(features, labels, mode):
    """Function that returns predictions, training loss, and training op."""
    weights = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)

    graph_builder = graph_builder_class(params, device_assigner=device_assigner)
    inference = {}
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.INFER):
      inference[eval_metrics.INFERENCE_PROB_NAME] = (
          graph_builder.inference_graph(features))

      if not params.regression:
        inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
            inference[eval_metrics.INFERENCE_PROB_NAME], 1)

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
      training_graph = control_flow_ops.group(
          graph_builder.training_graph(
              features, labels, input_weights=weights,
              num_trainers=num_trainers,
              trainer_id=trainer_id),
          state_ops.assign_add(contrib_framework.get_global_step(), 1))
      loss_deps.append(training_graph)

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.TRAIN):
      with ops.control_dependencies(loss_deps):
        training_loss = graph_builder.training_loss(
            features, labels, name=LOSS_NAME)
      if report_feature_importances and mode == model_fn_lib.ModeKeys.EVAL:
        training_loss = logging_ops.Print(training_loss,
                                          [graph_builder.feature_importances()],
                                          summarize=1000)
    # Put weights back in
    if weights is not None:
      features[weights_name] = weights

    training_hooks = []
    if early_stopping_rounds:
      training_hooks.append(TensorForestLossHook(early_stopping_rounds))

    return model_fn_lib.ModelFnOps(
        mode=mode,
        predictions=inference,
        loss=training_loss,
        train_op=training_graph,
        training_hooks=training_hooks)
示例#21
0
文件: base.py 项目: jkk544/tensorflow
 def _model_fn(features, targets, mode):
   """Model function."""
   ops.get_default_graph().add_to_collection('IS_TRAINING', mode == 'train')
   if self.class_weight is not None:
     constant_op.constant(self.class_weight, name='class_weight')
   predictions, loss = model_fn(features, targets)
   if isinstance(self.learning_rate, types.FunctionType):
     learning_rate = self.learning_rate(contrib_framework.get_global_step())
   else:
     learning_rate = self.learning_rate
   if isinstance(self.optimizer, types.FunctionType):
     optimizer = self.optimizer(learning_rate)
   else:
     optimizer = self.optimizer
   train_op = layers.optimize_loss(
       loss,
       contrib_framework.get_global_step(),
       learning_rate=learning_rate,
       optimizer=optimizer,
       clip_gradients=self.clip_gradients)
   return predictions, loss, train_op
示例#22
0
 def before_run(self, run_context):
     return session_run_hook.SessionRunArgs({
         'global_step':
         contrib_framework.get_global_step(),
         'current_loss':
         run_context.session.graph.get_operation_by_name(
             'rf_training_loss').outputs[0],
         'confusion_matrix_print':
         run_context.session.graph.get_operation_by_name(
             'confusion_matrix_print').outputs[0],
         'regression_ornot':
         run_context.session.graph.get_operation_by_name(
             'regression_ornot').outputs[0],
     })
示例#23
0
def conv_model(X, Y_, mode):
    XX = tf.reshape(X, [-1, 28, 28, 1])
    biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
    Y1 = layers.conv2d(XX,  num_outputs=6,  kernel_size=[6, 6], biases_initializer=biasInit)
    Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit)
    Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit)
    Y4 = layers.flatten(Y3)
    Y5 = layers.relu(Y4, 200, biases_initializer=biasInit)
    Ylogits = layers.linear(Y5, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100
    train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam")
    return {"predictions":predict, "classes": classes}, loss, train_op
示例#24
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        weights = None
        keys = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        graph_builder = graph_builder_class(params,
                                            device_assigner=device_assigner)
        inference = {}
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.INFER):
            inference[eval_metrics.INFERENCE_PROB_NAME] = (
                graph_builder.inference_graph(features))

            if not params.regression:
                inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                    inference[eval_metrics.INFERENCE_PROB_NAME], 1)
            if keys:
                inference[KEYS_NAME] = keys

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        loss_deps = []
        training_graph = None
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            training_graph = control_flow_ops.group(
                graph_builder.training_graph(features,
                                             labels,
                                             input_weights=weights,
                                             num_trainers=num_trainers,
                                             trainer_id=trainer_id),
                state_ops.assign_add(contrib_framework.get_global_step(), 1))
            loss_deps.append(training_graph)

        training_loss = None
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.TRAIN):
            with ops.control_dependencies(loss_deps):
                training_loss = graph_builder.training_loss(features,
                                                            labels,
                                                            name=LOSS_NAME)
        # Put weights back in
        if weights is not None:
            features[weights_name] = weights
        return (inference, training_loss, training_graph)
示例#25
0
 def _build_model(self, data, target):
     ids = tensorflow.split(1, self.n_ids, data)
     node_vectors = [
         learn.ops.categorical_variable(ids[i], self.vocabulary_sizes[i],
                                        self.layer_size, str(i))
         for i in range(self.n_ids)
     ]
     activation_in = tensorflow.squeeze(tensorflow.concat(2, node_vectors),
                                        [1])
     activation_out = layers.stack(activation_in, layers.fully_connected,
                                   self.hidden_units_formation)
     prediction, loss = learn.models.linear_regression(
         activation_out, target)
     train_op = layers.optimize_loss(loss, framework.get_global_step(),
                                     self.learning_rate, 'SGD')
     return prediction, loss, train_op
示例#26
0
  def _model_fn(features, labels, mode):
    """Function that returns predictions, training loss, and training op."""
    weights = None
    keys = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)
    if keys_name and keys_name in features:
      keys = features.pop(keys_name)

    graph_builder = graph_builder_class(params, device_assigner=device_assigner)
    inference = {}
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.INFER):
      inference[eval_metrics.INFERENCE_PROB_NAME] = (
          graph_builder.inference_graph(features))

      if not params.regression:
        inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
            inference[eval_metrics.INFERENCE_PROB_NAME], 1)
      if keys:
        inference[KEYS_NAME] = keys

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
      training_graph = control_flow_ops.group(
          graph_builder.training_graph(
              features, labels, input_weights=weights,
              num_trainers=num_trainers,
              trainer_id=trainer_id),
          state_ops.assign_add(contrib_framework.get_global_step(), 1))
      loss_deps.append(training_graph)

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.TRAIN):
      with ops.control_dependencies(loss_deps):
        training_loss = graph_builder.training_loss(
            features, labels, name=LOSS_NAME)
    # Put weights back in
    if weights is not None:
      features[weights_name] = weights
    return (inference, training_loss, training_graph)
示例#27
0
    def _model_fn(features, labels):
        """Function that returns predictions, training loss, and training op."""
        weights = None
        keys = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)
        processed_features, spec = data_ops.ParseDataTensorOrDict(features)
        _assert_float32(processed_features)
        if labels is not None:
            labels = data_ops.ParseLabelTensorOrDict(labels)
            _assert_float32(labels)

        graph_builder = graph_builder_class(params,
                                            device_assigner=device_assigner)
        inference = {
            eval_metrics.INFERENCE_PROB_NAME:
            graph_builder.inference_graph(processed_features, data_spec=spec)
        }
        if not params.regression:
            inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                inference[eval_metrics.INFERENCE_PROB_NAME], 1)
        if keys:
            inference[KEYS_NAME] = keys

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        training_loss = None
        training_graph = None
        if labels is not None:
            training_loss = graph_builder.training_loss(processed_features,
                                                        labels,
                                                        data_spec=spec,
                                                        name=LOSS_NAME)
            training_graph = control_flow_ops.group(
                graph_builder.training_graph(processed_features,
                                             labels,
                                             data_spec=spec,
                                             input_weights=weights),
                state_ops.assign_add(contrib_framework.get_global_step(), 1))
        # Put weights back in
        if weights is not None:
            features[weights_name] = weights
        return (inference, training_loss, training_graph)
示例#28
0
def emb_classifier(x, x_mask, y, dropout, opt):
    # print x.get_shape()  # batch L
    x_emb, W_emb = embedding(x, opt)  # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1
    x_emb = tf.nn.dropout(x_emb, dropout)  # batch L emb 1

    x_mask = tf.expand_dims(x_mask, axis=-1)
    x_mask = tf.expand_dims(x_mask, axis=-1)  # batch L 1 1

    x_sum = tf.multiply(x_emb, x_mask)  # batch L emb 1
    H_enc = tf.reduce_sum(x_sum, axis=1, keep_dims=True)  # batch 1 emb 1
    H_enc = tf.squeeze(H_enc)  # batch emb
    x_mask_sum = tf.reduce_sum(x_mask, axis=1, keep_dims=True)  # batch 1 1 1
    x_mask_sum = tf.squeeze(x_mask_sum, [2, 3])  # batch 1
    H_enc_1 = H_enc / x_mask_sum  # batch emb

    H_enc_2 = tf.nn.max_pool(x_emb, [1, opt.maxlen, 1, 1], [1, 1, 1, 1],
                             'VALID')
    H_enc_2 = tf.squeeze(H_enc_2)

    H_enc = tf.concat([H_enc_1, H_enc_2], 1)

    H_enc = tf.squeeze(H_enc)
    logits = discriminator_2layer(H_enc,
                                  opt,
                                  dropout,
                                  prefix='classify_',
                                  num_outputs=10,
                                  is_reuse=None)  # batch * 10
    prob = tf.nn.softmax(logits)

    correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))

    train_op = layers.optimize_loss(loss,
                                    framework.get_global_step(),
                                    optimizer='Adam',
                                    learning_rate=opt.lr)

    return accuracy, loss, train_op, W_emb
示例#29
0
def cons_disc(x_1, x_2, y, opt, l_temp = 1):
    # print x.get_shape()  # batch L

    res = {}
    
    logits, H_1, H_2, H_1_1, H_2_1 = pair_discriminator(x_1, x_2, opt, l_temp)

    corr1 = correlation_cost(H_1_1)
    corr2 = correlation_cost(H_2_1)

    res['logits'] = logits
    res['y_pred'] = (logits > 0)
    # res['H_1'] = H_1
    # res['H_2'] = H_2
    res['H_1'] = H_1_1
    res['H_2'] = H_2_1
    res['corr'] = tf.sqrt((corr1 + corr2)/2)
    
    if opt.model == 'D':
        y_pred = logits
        loss = tf.reduce_mean(y * tf.log(y_pred))
    else:
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = logits)) 
  
    # encourage binary and disentangle
    loss = loss \
            + opt.reg* tf.reduce_mean( tf.square(tf.ones_like(H_1_1)-H_1_1) * tf.square(H_1_1) ) \
            + opt.reg* tf.reduce_mean( tf.square(tf.ones_like(H_2_1)-H_2_1) * tf.square(H_2_1) )
    if opt.reg_corr != 0:
        loss += opt.reg_corr* (corr1 + corr2)



    tf.summary.scalar('loss', loss)
    
    train_op = layers.optimize_loss(
        loss,
        framework.get_global_step(),
        optimizer='Adam',
        learning_rate=opt.lr)

    return res, loss, train_op
示例#30
0
  def _model_fn(features, labels):
    """Function that returns predictions, training loss, and training op."""
    weights = None
    keys = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)
    if keys_name and keys_name in features:
      keys = features.pop(keys_name)
    processed_features, spec = data_ops.ParseDataTensorOrDict(features)
    _assert_float32(processed_features)
    if labels is not None:
      labels = data_ops.ParseLabelTensorOrDict(labels)
      _assert_float32(labels)

    graph_builder = graph_builder_class(params, device_assigner=device_assigner)
    inference = {eval_metrics.INFERENCE_PROB_NAME:
                 graph_builder.inference_graph(processed_features,
                                               data_spec=spec)}
    if not params.regression:
      inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
          inference[eval_metrics.INFERENCE_PROB_NAME], 1)
    if keys:
      inference[KEYS_NAME] = keys

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    training_loss = None
    training_graph = None
    if labels is not None:
      training_loss = graph_builder.training_loss(processed_features, labels,
                                                  data_spec=spec,
                                                  name=LOSS_NAME)
      training_graph = control_flow_ops.group(
          graph_builder.training_graph(
              processed_features, labels, data_spec=spec,
              input_weights=weights),
          state_ops.assign_add(contrib_framework.get_global_step(), 1))
    # Put weights back in
    if weights is not None:
      features[weights_name] = weights
    return (inference, training_loss, training_graph)
示例#31
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Expected to be overriden by sub-classes that require custom support.
    This implementation uses `model_fn` passed as parameter to constructor to
    build model.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    _, loss = self._model_fn(features, targets, ModeKeys.TRAIN)
    train_op = layers.optimize_loss(
        loss,
        contrib_framework.get_global_step(),
        learning_rate=self.learning_rate,
        optimizer=self.optimizer,
        clip_gradients=self.clip_gradients)
    return train_op, loss
示例#32
0
文件: s2s.py 项目: dreasysnail/CoCon
def conditional_s2s(src, tgt, z,  opt, opt_t=None, is_reuse_generator = None):
    if not opt_t: opt_t = opt
    res = {}
    if opt.use_tgt_z:
        
        W_norm_d = embedding_only(opt, prefix = 'd_', is_reuse = None) 
        z, _ = encoder(tgt, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = None, is_prob=None, is_padded= False)
    syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(z, src, tgt, opt, is_reuse = is_reuse_generator, prefix ='g_')
    
    if opt.global_feature:
        z_hat, _ = encoder(syn_one_hot, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = True, is_prob=True, is_padded= False)
        z_loss = tf.reduce_sum(tf.square(z - z_hat))/opt.batch_size/opt.n_hid
        res['z'] = z
        res['z_hat'] = z_hat 
        res['z_loss'] = z_loss 
    
    res['syn_sent'] = syn_sent 
    g_cost = sup_loss + (z_loss*opt.lambda_z if opt.global_feature else 0)
    tf.summary.scalar('sup_loss', sup_loss)
    if opt.global_feature:
        tf.summary.scalar('z_loss', z_loss)
    summaries = [
        "learning_rate",
        "loss",
    ]
    t_vars = tf.trainable_variables()
    g_vars = [var for var in t_vars if 'g_' in var.name]
    train_op_g = layers.optimize_loss(
        g_cost,
        
        framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        
        variables=g_vars,
        learning_rate=opt.lr_g,
        summaries=summaries)
    return res, g_cost, train_op_g
示例#33
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    features, _, weights, spec = data_ops.ParseDataTensorOrDict(features)
    labels = data_ops.ParseLabelTensorOrDict(targets)
    features, labels = self._feature_engineering_fn(features, labels)
    _assert_float32(features)
    _assert_float32(labels)

    if weights is not None:
      if 'input_weights' in self.training_args:
        logging.warning('Replacing input_weights in training_args.')
      self.training_args['input_weights'] = weights

    graph_builder = self.graph_builder_class(
        self.params, device_assigner=self.device_assigner,
        **self.construction_args)

    epoch = None
    if self.data_feeder:
      epoch = self.data_feeder.make_epoch_variable()

    train = control_flow_ops.group(
        graph_builder.training_graph(
            features, labels, data_spec=spec, epoch=epoch,
            **self.training_args),
        state_ops.assign_add(contrib_framework.get_global_step(), 1))

    self.training_loss = graph_builder.training_loss(features, targets)

    return train, self.training_loss
示例#34
0
    def model_fn(features, labels, mode):
        """Builds generic graph for training or eval."""

        # Build a Graph that computes predictions from the inference model.
        logits = inference(features, args.hidden1, args.hidden2)

        tensors = {}
        # Add to the Graph the Ops for loss calculation.
        if mode == ModeKeys.INFER:
            softmax = tf.nn.softmax(logits)
            tensors['digit'] = tf.argmax(softmax, 1)
            loss_op = None
        else:
            loss_op = loss(logits, labels)
            tensors['loss'] = loss_op
            tf.scalar_summary('loss', loss_op)

        if mode == ModeKeys.EVAL:
            # Add to the Graph the Ops for accuracy calculation.
            accuracy_op = evaluation(logits, labels)
            tensors['accuracy'] = accuracy_op
            tf.scalar_summary('training/hptuning/metric', accuracy_op)

        # Add to the Graph the Ops that calculate and apply gradients.
        if mode == ModeKeys.TRAIN:
            global_step = framework.get_global_step()
            # Create the gradient descent optimizer with the given learning rate.
            optimizer = tf.train.GradientDescentOptimizer(args.learning_rate)
            # Create a variable to track the global step.
            # Use the optimizer to apply the gradients that minimize the loss
            # (and also increment the global step counter) as a single training step.
            train_op = optimizer.minimize(loss_op, global_step=global_step)
            # Add streaming means.
        else:
            train_op = None

        return tensors, loss_op, train_op
示例#35
0
  def model_fn(features, labels, mode):
    """Builds generic graph for training or eval."""

    # Build a Graph that computes predictions from the inference model.
    logits = inference(features, args.hidden1, args.hidden2)

    tensors = {}
    # Add to the Graph the Ops for loss calculation.
    if mode == ModeKeys.INFER:
      tensors['digit'] = tf.argmax(logits, 1)
      loss_op = None
    else:
      loss_op = loss(logits, labels)
      tensors['loss'] = loss_op
      tf.scalar_summary('loss', loss_op)

    if mode == ModeKeys.EVAL:
      # Add to the Graph the Ops for accuracy calculation.
      accuracy_op = evaluation(logits, labels)
      tensors['accuracy'] = accuracy_op
      tf.scalar_summary('training/hptuning/metric', accuracy_op)

    # Add to the Graph the Ops that calculate and apply gradients.
    if mode == ModeKeys.TRAIN:
      global_step = framework.get_global_step()
      # Create the gradient descent optimizer with the given learning rate.
      optimizer = tf.train.GradientDescentOptimizer(args.learning_rate)
      # Create a variable to track the global step.
      # Use the optimizer to apply the gradients that minimize the loss
      # (and also increment the global step counter) as a single training step.
      train_op = optimizer.minimize(loss_op, global_step=global_step)
      # Add streaming means.
    else:
      train_op = None

    return tensors, loss_op, train_op
        def model_fn(features, labels, mode):
            """BaselineModel model_fn.

      Args:
        features: `Tensor` or `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name, "IPS_example_weights_with_label",
          and "IPS_example_weights_without_label".
          IPS stands for inverse propensity score, wherein each example is
          assigned a weight inversely proportionate their propensity of
          appearing in training distribution. Concretely, ips-weight = 1/p(x),
          where p(x) is the probability of x in training distribution.
          In "IPS_without_label", each example is given a weight as the inverse
          propensity score of their subgroup. For example, 1/p("Black Female").
          In "IPS_with_label", each example is assigned a weight as the inverse
          propensity score of their subgroup and class membership. For example,
          1/p("Black Female", "class 0")).
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.
      """

            # Instantiates a tensor with true class labels
            class_labels = labels[self._label_column_name]

            ips_example_weights_with_label = labels[
                IPS_WITH_LABEL_TARGET_COLUMN_NAME]
            ips_example_weights_without_label = labels[
                IPS_WITHOUT_LABEL_TARGET_COLUMN_NAME]

            tf.logging.info('model_fn for mode: {}'.format(mode))

            with tf.name_scope('model'):
                input_layer = tf.feature_column.input_layer(
                    features, self._feature_columns)
                layer = input_layer
                for unit in self._hidden_units:
                    layer = tf.layers.Dense(unit,
                                            activation=self._activation)(layer)
                logits = tf.layers.Dense(1)(layer)
                sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
                class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)  # pylint: disable=line-too-long
                tf.summary.histogram('class_predictions', class_predictions)

            if self._reweighting_type == 'IPS_with_label':
                example_weights = ips_example_weights_with_label
            elif self._reweighting_type == 'IPS_without_label':
                example_weights = ips_example_weights_without_label

            # Initializes Loss Functions
            loss = self._loss(class_labels, logits, example_weights)

            # Sets up dictionaries used for computing performance metrics
            predictions = {
                (self._label_column_name, 'class_ids'):
                tf.reshape(class_predictions, [-1]),
                (self._label_column_name, 'logistic'):
                tf.reshape(sigmoid_output, [-1])
            }

            class_id_kwargs = {
                'labels': class_labels,
                'predictions': class_predictions
            }
            logistics_kwargs = {
                'labels': class_labels,
                'predictions': sigmoid_output
            }

            # EVAL Mode
            if mode == tf_estimator.ModeKeys.EVAL:
                with tf.name_scope('eval_metrics'):
                    eval_metric_ops = {
                        'accuracy':
                        tf.metrics.accuracy(**class_id_kwargs),
                        'precision':
                        tf.metrics.precision(**class_id_kwargs),
                        'recall':
                        tf.metrics.recall(**class_id_kwargs),
                        'fp':
                        tf.metrics.false_positives(**class_id_kwargs),
                        'fn':
                        tf.metrics.false_negatives(**class_id_kwargs),
                        'tp':
                        tf.metrics.true_positives(**class_id_kwargs),
                        'tn':
                        tf.metrics.true_negatives(**class_id_kwargs),
                        'fpr':
                        contrib_metrics.streaming_false_positive_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'fnr':
                        contrib_metrics.streaming_false_negative_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'auc':
                        tf.metrics.auc(curve='ROC', **logistics_kwargs),
                        'aucpr':
                        tf.metrics.auc(curve='PR', **logistics_kwargs)
                    }

                    # EstimatorSpec object for evaluation
                    estimator_spec = tf_estimator.EstimatorSpec(
                        mode=mode,
                        predictions=predictions,
                        loss=loss,
                        eval_metric_ops=eval_metric_ops)

            # TRAIN Mode
            if mode == tf_estimator.ModeKeys.TRAIN:
                train_op_primary = contrib_layers.optimize_loss(
                    loss=loss,
                    learning_rate=self._learning_rate,
                    global_step=contrib_framework.get_global_step(),
                    optimizer=self._optimizer)

                estimator_spec = tf_estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    loss=loss,
                    train_op=train_op_primary)

            return estimator_spec
示例#37
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:

  - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - function, takes learning rate `Tensor` as argument and must return
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - class, subclass of `Optimizer` that takes only one required argument -
      learning rate, such as AdamOptimizer, AdagradOptimizer.
      E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`.
  - object, instance of subclass of `Optimizer`.
      E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter for each update. If not
                 supplied, it will be fetched from the default graph (see
                 `tf.contrib.framework.get_global_step` for details). If it's
                 not been created, no step will be incremented with each weight
                 update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
                   step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float, callable or `None`. If float, is provided, a global
      clipping is applied to prevent the norm of the gradient to exceed this
      value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
      This callable takes a `list` of `(gradients, variables)` `tuple`s and
      returns the same thing with the gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
                            Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` is wrong type.
        * `clip_gradients` is not float or callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
  """
    loss = ops.convert_to_tensor(loss)
    contrib_framework.assert_scalar(loss)
    if global_step is None:
        global_step = contrib_framework.get_global_step()
    else:
        contrib_framework.assert_global_step(global_step)
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if (isinstance(learning_rate, ops.Tensor)
                    and learning_rate.get_shape().ndims == 0):
                lr = learning_rate
            elif isinstance(learning_rate, float):
                if learning_rate < 0.0:
                    raise ValueError("Invalid learning_rate %s.",
                                     learning_rate)
                lr = vs.get_variable(
                    "learning_rate", [],
                    trainable=False,
                    initializer=init_ops.constant_initializer(learning_rate))
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" %
                    (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        else:
            for summ in summaries:
                if summ not in OPTIMIZER_SUMMARIES:
                    raise ValueError(
                        "Summaries should be one of [%s], you provided %s." %
                        (", ".join(OPTIMIZER_SUMMARIES), summ))
        if learning_rate is not None and learning_rate_decay_fn is not None:
            if global_step is None:
                raise ValueError(
                    "global_step is required for learning_rate_decay_fn.")
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                summary.scalar("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is string (%s)." % optimizer)
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif (isinstance(optimizer, type)
              and issubclass(optimizer, optimizer_.Optimizer)):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is class (%s)." % optimizer)
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return "
                    "subclass of Optimizer. Got %s." % str(opt))
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(
            loss,
            variables,
            colocate_gradients_with_ops=colocate_gradients_with_ops)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        if "gradient_norm" in summaries:
            summary.scalar("global_norm/gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Optionally clip gradients by global norm.
        if isinstance(clip_gradients, float):
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)
        elif callable(clip_gradients):
            gradients = clip_gradients(gradients)
        elif clip_gradients is not None:
            raise ValueError("Unknown type %s for clip_gradients" %
                             type(clip_gradients))

        # Add scalar summary for loss.
        if "loss" in summaries:
            summary.scalar("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                var_name = variable.name.replace(":", "_")
                if "gradients" in summaries:
                    summary.histogram("gradients/%s" % var_name, grad_values)
                if "gradient_norm" in summaries:
                    summary.scalar("gradient_norm/%s" % var_name,
                                   clip_ops.global_norm([grad_values]))

        if clip_gradients is not None and "gradient_norm" in summaries:
            summary.scalar("global_norm/clipped_gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
示例#38
0
    def model_fn(features, labels, mode):
      """BaselineModel model_fn.

      Args:
        features: `Tensor` or `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name.
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.
      """

      # Instantiates a tensor with true class labels
      class_labels = labels[self._label_column_name]

      tf.logging.info('model_fn for mode: {}'.format(mode))

      with tf.name_scope('model'):
        input_layer = tf.feature_column.input_layer(features,
                                                    self._feature_columns)
        layer = input_layer
        for unit in self._hidden_units:
          layer = tf.layers.Dense(unit, activation=self._activation)(layer)
        logits = tf.layers.Dense(1)(layer)
        sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
        class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)
        tf.summary.histogram('class_predictions', class_predictions)

      # Initializes Loss Functions
      loss = self._loss(class_labels, logits)
      # Sets up dictionaries used for computing performance metrics
      predictions = {
          (self._label_column_name, 'class_ids'):
              tf.reshape(class_predictions, [-1]),
          (self._label_column_name, 'logistic'):
              tf.reshape(sigmoid_output, [-1])
      }

      class_id_kwargs = {
          'labels': class_labels,
          'predictions': class_predictions
      }
      logistics_kwargs = {'labels': class_labels, 'predictions': sigmoid_output}

      # EVAL Mode
      if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('eval_metrics'):
          eval_metric_ops = {
              'accuracy': tf.metrics.accuracy(**class_id_kwargs),
              'precision': tf.metrics.precision(**class_id_kwargs),
              'recall': tf.metrics.recall(**class_id_kwargs),
              'fp': tf.metrics.false_positives(**class_id_kwargs),
              'fn': tf.metrics.false_negatives(**class_id_kwargs),
              'tp': tf.metrics.true_positives(**class_id_kwargs),
              'tn': tf.metrics.true_negatives(**class_id_kwargs),
              'fpr': contrib_metrics.streaming_false_positive_rate(**class_id_kwargs),  # pylint: disable=line-too-long
              'fnr': contrib_metrics.streaming_false_negative_rate(**class_id_kwargs),  # pylint: disable=line-too-long
              'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs),
              'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs)
          }

          # EstimatorSpec object for evaluation
          estimator_spec = tf.estimator.EstimatorSpec(
              mode=mode,
              predictions=predictions,
              loss=loss,
              eval_metric_ops=eval_metric_ops)

      # TRAIN Mode
      if mode == tf.estimator.ModeKeys.TRAIN:
        train_op_primary = contrib_layers.optimize_loss(
            loss=loss,
            learning_rate=self._learning_rate,
            global_step=contrib_framework.get_global_step(),
            optimizer=self._optimizer)

        estimator_spec = tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op_primary)

      return estimator_spec
        def model_fn(features, labels, mode):
            """robustModel model_fn.

      Args:
        features: `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name.
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.

      Raises:
        ValueError: if protected_column_names not in feature_columns
      """
            for col in self._protected_column_names:
                if col not in features.keys():
                    raise ValueError(
                        'Protected column <{}> should be in features.'.format(
                            col))

            # Instantiates a tensor with true class labels
            class_labels = labels[self._label_column_name]

            # Initialize a global step variable used for alternate training
            current_step = self._get_or_create_global_step_var()

            tf.logging.info('model_fn for mode: {}'.format(mode))

            with tf.name_scope('primary_NN'):
                with tf.variable_scope('primary'):
                    input_layer = tf.feature_column.input_layer(
                        features, self._feature_columns)
                    layer = input_layer
                    for unit in self._primary_hidden_units:
                        layer = tf.layers.Dense(
                            unit, activation=self._activation)(layer)
                    logits = tf.layers.Dense(1)(layer)
                    sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
                    class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)  # pylint: disable=line-too-long
                    tf.summary.histogram('class_predictions',
                                         class_predictions)

            with tf.name_scope('adversary_NN'):
                with tf.variable_scope('adversary'):
                    # Filters and keeps only protected features and feature columns.
                    adversarial_features, adversary_feature_columns = self._get_adversary_features_and_feature_columns(features)  # pylint: disable=line-too-long

                    adv_input_layer = tf.feature_column.input_layer(
                        adversarial_features, adversary_feature_columns)

                    adv_layer = adv_input_layer
                    for adv_unit in self._adversary_hidden_units:
                        adv_layer = tf.layers.Dense(adv_unit)(adv_layer)
                    adv_output_layer = tf.layers.Dense(
                        1, use_bias=True)(adv_layer)

                    example_weights = tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: self._compute_example_weights(
                            adv_output_layer),
                        false_fn=lambda: tf.ones_like(class_labels))

            # Initializes Loss Functions
            primary_loss = self._primary_loss(class_labels, logits,
                                              example_weights)
            adversary_loss = self._adversary_loss(class_labels, logits,
                                                  example_weights)

            # Sets up dictionaries used for computing performance metrics
            predictions = {
                (self._label_column_name, 'class_ids'):
                tf.reshape(class_predictions, [-1]),
                (self._label_column_name, 'logistic'):
                tf.reshape(sigmoid_output, [-1]),
                ('example_weights'):
                tf.reshape(example_weights, [-1])
            }

            class_id_kwargs = {
                'labels': class_labels,
                'predictions': class_predictions
            }
            logistics_kwargs = {
                'labels': class_labels,
                'predictions': sigmoid_output
            }

            # EVAL Mode
            if mode == tf.estimator.ModeKeys.EVAL:
                with tf.name_scope('eval_metrics'):
                    eval_metric_ops = {
                        'accuracy':
                        tf.metrics.accuracy(**class_id_kwargs),
                        'precision':
                        tf.metrics.precision(**class_id_kwargs),
                        'recall':
                        tf.metrics.recall(**class_id_kwargs),
                        'fp':
                        tf.metrics.false_positives(**class_id_kwargs),
                        'fn':
                        tf.metrics.false_negatives(**class_id_kwargs),
                        'tp':
                        tf.metrics.true_positives(**class_id_kwargs),
                        'tn':
                        tf.metrics.true_negatives(**class_id_kwargs),
                        'fpr':
                        contrib_metrics.streaming_false_positive_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'fnr':
                        contrib_metrics.streaming_false_negative_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'auc':
                        tf.metrics.auc(curve='ROC', **logistics_kwargs),
                        'aucpr':
                        tf.metrics.auc(curve='PR', **logistics_kwargs)
                    }

                    # EstimatorSpec object for evaluation
                    estimator_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        predictions=predictions,
                        loss=primary_loss,
                        eval_metric_ops=eval_metric_ops)

            # TRAIN Mode
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Filters trainable variables for each task
                all_trainable_vars = tf.trainable_variables()
                primary_trainable_vars = [
                    v for v in all_trainable_vars if 'primary' in v.op.name
                ]
                adversary_trainable_vars = [
                    v for v in all_trainable_vars if 'adversary' in v.op.name
                ]

                # TRAIN_OP for adversary DNN
                train_op_adversary = contrib_layers.optimize_loss(
                    loss=adversary_loss,
                    variables=adversary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._adversary_learning_rate,
                    optimizer=self._optimizer)

                # TRAIN_OP for primary DNN
                train_op_primary = contrib_layers.optimize_loss(
                    loss=primary_loss,
                    variables=primary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._primary_learning_rate,
                    optimizer=self._optimizer)

                # Upto ``pretrain_steps'' trains primary only.
                # Beyond ``pretrain_steps'' alternates between primary and adversary.
                estimator_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    loss=primary_loss + adversary_loss,
                    train_op=tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: tf.group(
                            [train_op_primary, train_op_adversary]),  # pylint: disable=line-too-long
                        false_fn=lambda: tf.group([train_op_primary])))

            return estimator_spec
示例#40
0
def random_forest_model_fn(features, labels, mode, params, config):
    """Function that returns predictions, training loss, and training op."""
    labels_tensor = labels
    if isinstance(labels, dict) and len(labels) == 1:
        labels_tensor = labels.values()[0]

    weights_name = params["weights_name"]
    keys_name = params["keys_name"]
    num_classes = tf.identity(params['num_classes'], name='num_classes')
    params_toGraphs = tensor_forest.ForestHParams(
        num_classes=params['num_classes'],
        num_features=params['num_features'],
        num_trees=params['num_trees'],
        max_nodes=params['max_nodes'],
        regression=params['regression'],
        split_after_samples=params['split_after_samples'])
    #  注意第90行 fill()
    # https://github.com/tensorflow/tensorflow/blob/r1.2/tensorflow/contrib
    # /tensor_forest/python/tensor_forest.py
    params_toGraphs = params_toGraphs.fill()
    graph_builder_class = tensor_forest.RandomForestGraphs

    early_stopping_rounds = params["early_stopping_rounds"]
    num_trainers = 1
    trainer_id = 0
    report_feature_importances = False
    model_dir = None
    local_eval = False
    device_assigner = None
    weights = None
    if weights_name and weights_name in features:
        weights = features.pop(weights_name)

    keys = None
    if keys_name and keys_name in features:
        keys = features.pop(keys_name)

    # If we're doing eval, optionally ignore device_assigner.
    # Also ignore device assigner if we're exporting (mode == INFER)
    dev_assn = device_assigner
    if (mode == model_fn_lib.ModeKeys.INFER
            or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
        dev_assn = None

    graph_builder = graph_builder_class(params_toGraphs,
                                        device_assigner=dev_assn)
    inference = {}
    predictions = {}
    output_alternatives = None
    # if (mode == model_fn_lib.ModeKeys.EVAL or
    #             mode == model_fn_lib.ModeKeys.INFER):
    if True:
        inference[eval_metrics.INFERENCE_PROB_NAME] = (
            graph_builder.inference_graph(features))

        if params_toGraphs.regression:
            predictions = {None: inference[eval_metrics.INFERENCE_PROB_NAME]}
            output_alternatives = {
                None: (constants.ProblemType.LINEAR_REGRESSION, predictions)
            }
        else:
            inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                inference[eval_metrics.INFERENCE_PROB_NAME], 1)

            predictions = {
                prediction_key.PredictionKey.PROBABILITIES:
                inference[eval_metrics.INFERENCE_PROB_NAME],
                prediction_key.PredictionKey.CLASSES:
                inference[eval_metrics.INFERENCE_PRED_NAME]
            }
            output_alternatives = {
                None: (constants.ProblemType.CLASSIFICATION, predictions)
            }

        if report_feature_importances:
            inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = (
                graph_builder.feature_importances())

        if keys is not None:
            inference[keys_name] = keys

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    training_hooks = []
    scaffold = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
        training_graph = control_flow_ops.group(
            graph_builder.training_graph(features,
                                         labels,
                                         input_weights=weights,
                                         num_trainers=num_trainers,
                                         trainer_id=trainer_id),
            state_ops.assign_add(contrib_framework.get_global_step(), 1))
        loss_deps.append(training_graph)
        if hasattr(graph_builder, 'finalize_training'):
            finalize_listener = EveryCheckpointPreSaveListener(
                graph_builder.finalize_training())
            scaffold = monitored_session.Scaffold()
            training_hooks.append(
                basic_session_run_hooks.CheckpointSaverHook(
                    model_dir,
                    save_secs=600,
                    save_steps=None,
                    scaffold=scaffold,
                    listeners=[finalize_listener]))

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL
            or mode == model_fn_lib.ModeKeys.TRAIN):
        with ops.control_dependencies(loss_deps):
            training_loss = graph_builder.training_loss(
                features, labels, name='rf_training_loss')

    # 命名以传到 hook 中
    if not params['regression']:
        confusion_matrix_print = confusion_matrix(
            labels=labels_tensor,
            predictions=predictions['classes'],
            num_classes=num_classes,
        )

        confusion_matrix_print = tf.identity(confusion_matrix_print,
                                             name='confusion_matrix_print')
    else:
        confusion_matrix_print = tf.identity(0, name='confusion_matrix_print')

    regression_ornot = tf.identity(params['regression'],
                                   name='regression_ornot')
    # Put weights back in
    if weights is not None:
        features[weights_name] = weights

    if early_stopping_rounds:
        training_hooks.append(TensorForestLossHook(early_stopping_rounds))

    metrics = {}
    # metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc(
    #     labels=labels_tensor,
    #     predictions=inference[eval_metrics.INFERENCE_PRED_NAME]
    # )
    if not params_toGraphs.regression:
        metrics['eval_confusion_matrix'] = confusion_matrix(
            labels=labels_tensor,
            predictions=predictions['classes'],
            num_classes=params['num_classes'],
        )

    return model_fn_lib.ModelFnOps(mode=mode,
                                   predictions=inference,
                                   loss=training_loss,
                                   train_op=training_graph,
                                   training_hooks=training_hooks,
                                   scaffold=scaffold,
                                   eval_metric_ops=metrics,
                                   output_alternatives=output_alternatives)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
示例#42
0
  def _model_fn(features, labels, mode):
    """Function that returns predictions, training loss, and training op."""
    weights = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)

    keys = None
    if keys_name and keys_name in features:
      keys = features.pop(keys_name)

    # If we're doing eval, optionally ignore device_assigner.
    # Also ignore device assigner if we're exporting (mode == INFER)
    dev_assn = device_assigner
    if (mode == model_fn_lib.ModeKeys.INFER or
        (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
      dev_assn = None

    graph_builder = graph_builder_class(params,
                                        device_assigner=dev_assn)
    inference = {}
    output_alternatives = None
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.INFER):
      inference[eval_metrics.INFERENCE_PROB_NAME] = (
          graph_builder.inference_graph(features))

      if params.regression:
        predictions = {
            None: inference[eval_metrics.INFERENCE_PROB_NAME]}
        output_alternatives = {
            None: (constants.ProblemType.LINEAR_REGRESSION, predictions)}
      else:
        inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
            inference[eval_metrics.INFERENCE_PROB_NAME], 1)

        predictions = {
            prediction_key.PredictionKey.PROBABILITIES:
                inference[eval_metrics.INFERENCE_PROB_NAME],
            prediction_key.PredictionKey.CLASSES:
                inference[eval_metrics.INFERENCE_PRED_NAME]}
        output_alternatives = {
            None: (constants.ProblemType.CLASSIFICATION, predictions)}

      if keys is not None:
        inference[keys_name] = keys

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    training_hooks = []
    scaffold = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
      training_graph = control_flow_ops.group(
          graph_builder.training_graph(
              features, labels, input_weights=weights,
              num_trainers=num_trainers,
              trainer_id=trainer_id),
          state_ops.assign_add(contrib_framework.get_global_step(), 1))
      loss_deps.append(training_graph)

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.TRAIN):
      with ops.control_dependencies(loss_deps):
        training_loss = graph_builder.training_loss(
            features, labels, name=LOSS_NAME)

    # Put weights back in
    if weights is not None:
      features[weights_name] = weights

    if early_stopping_rounds:
      training_hooks.append(TensorForestLossHook(early_stopping_rounds,
                                                 loss_op=training_loss))

    if report_feature_importances:
      training_hooks.append(TensorForestRunOpAtEndHook(
          {'feature_importances': graph_builder.feature_importances()}))

    return model_fn_lib.ModelFnOps(
        mode=mode,
        predictions=inference,
        loss=training_loss,
        train_op=training_graph,
        training_hooks=training_hooks,
        scaffold=scaffold,
        output_alternatives=output_alternatives)
示例#43
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        if (isinstance(features, ops.Tensor)
                or isinstance(features, sparse_tensor.SparseTensor)):
            features = {'features': features}
        if feature_columns:
            features = features.copy()
            features.update(
                layers.transform_features(features, feature_columns))

        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        keys = None
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        # If we're doing eval, optionally ignore device_assigner.
        # Also ignore device assigner if we're exporting (mode == INFER)
        dev_assn = device_assigner
        if (mode == model_fn_lib.ModeKeys.INFER
                or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
            dev_assn = None

        graph_builder = graph_builder_class(params, device_assigner=dev_assn)

        logits, tree_paths, regression_variance = graph_builder.inference_graph(
            features)

        summary.scalar('average_tree_size', graph_builder.average_size())
        # For binary classification problems, convert probabilities to logits.
        # Includes hack to get around the fact that a probability might be 0 or 1.
        if not params.regression and params.num_classes == 2:
            class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1])
            logits = math_ops.log(
                math_ops.maximum(
                    class_1_probs /
                    math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON))

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        training_graph = None
        training_hooks = []
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            with ops.control_dependencies([logits.op]):
                training_graph = control_flow_ops.group(
                    graph_builder.training_graph(features,
                                                 labels,
                                                 input_weights=weights,
                                                 num_trainers=num_trainers,
                                                 trainer_id=trainer_id),
                    state_ops.assign_add(contrib_framework.get_global_step(),
                                         1))

        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        # TensorForest's training graph isn't calculated directly from the loss
        # like many other models.
        def _train_fn(unused_loss):
            return training_graph

        model_ops = model_head.create_model_fn_ops(features=features,
                                                   labels=labels,
                                                   mode=mode,
                                                   train_op_fn=_train_fn,
                                                   logits=logits,
                                                   scope=head_scope)

        # Ops are run in lexigraphical order of their keys. Run the resource
        # clean-up op last.
        all_handles = graph_builder.get_all_resource_handles()
        ops_at_end = {
            '9: clean up resources':
            control_flow_ops.group(*[
                resource_variable_ops.destroy_resource_op(handle)
                for handle in all_handles
            ])
        }

        if report_feature_importances:
            ops_at_end['1: feature_importances'] = (
                graph_builder.feature_importances())

        training_hooks.append(TensorForestRunOpAtEndHook(ops_at_end))

        if early_stopping_rounds:
            training_hooks.append(
                TensorForestLossHook(
                    early_stopping_rounds,
                    early_stopping_loss_threshold=early_stopping_loss_threshold,
                    loss_op=model_ops.loss))

        model_ops.training_hooks.extend(training_hooks)

        if keys is not None:
            model_ops.predictions[keys_name] = keys

        if params.inference_tree_paths:
            model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths

        if params.regression:
            model_ops.predictions[
                VARIANCE_PREDICTION_KEY] = regression_variance

        return model_ops
示例#44
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        keys = None
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        # If we're doing eval, optionally ignore device_assigner.
        # Also ignore device assigner if we're exporting (mode == INFER)
        dev_assn = device_assigner
        if (mode == model_fn_lib.ModeKeys.INFER
                or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
            dev_assn = None

        graph_builder = graph_builder_class(params, device_assigner=dev_assn)
        inference = {}
        output_alternatives = None
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.INFER):
            inference[eval_metrics.INFERENCE_PROB_NAME] = (
                graph_builder.inference_graph(features))

            if params.regression:
                predictions = {
                    None: inference[eval_metrics.INFERENCE_PROB_NAME]
                }
                output_alternatives = {
                    None:
                    (constants.ProblemType.LINEAR_REGRESSION, predictions)
                }
            else:
                inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                    inference[eval_metrics.INFERENCE_PROB_NAME], 1)

                predictions = {
                    prediction_key.PredictionKey.PROBABILITIES:
                    inference[eval_metrics.INFERENCE_PROB_NAME],
                    prediction_key.PredictionKey.CLASSES:
                    inference[eval_metrics.INFERENCE_PRED_NAME]
                }
                output_alternatives = {
                    None: (constants.ProblemType.CLASSIFICATION, predictions)
                }

            if keys is not None:
                inference[keys_name] = keys

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        loss_deps = []
        training_graph = None
        training_hooks = []
        scaffold = None
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            training_graph = control_flow_ops.group(
                graph_builder.training_graph(features,
                                             labels,
                                             input_weights=weights,
                                             num_trainers=num_trainers,
                                             trainer_id=trainer_id),
                state_ops.assign_add(contrib_framework.get_global_step(), 1))
            loss_deps.append(training_graph)
            if hasattr(graph_builder, 'finalize_training'):
                finalize_listener = EveryCheckpointPreSaveListener(
                    graph_builder.finalize_training())
                scaffold = monitored_session.Scaffold()
                training_hooks.append(
                    basic_session_run_hooks.CheckpointSaverHook(
                        model_dir,
                        save_secs=600,
                        save_steps=None,
                        scaffold=scaffold,
                        listeners=[finalize_listener]))

        training_loss = None
        if (mode == model_fn_lib.ModeKeys.EVAL
                or mode == model_fn_lib.ModeKeys.TRAIN):
            with ops.control_dependencies(loss_deps):
                training_loss = graph_builder.training_loss(features,
                                                            labels,
                                                            name=LOSS_NAME)

        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        if early_stopping_rounds:
            training_hooks.append(TensorForestLossHook(early_stopping_rounds))

        if report_feature_importances:
            training_hooks.append(
                TensorForestRunOpAtEndHook({
                    'feature_importances':
                    graph_builder.feature_importances()
                }))

        return model_fn_lib.ModelFnOps(mode=mode,
                                       predictions=inference,
                                       loss=training_loss,
                                       train_op=training_graph,
                                       training_hooks=training_hooks,
                                       scaffold=scaffold,
                                       output_alternatives=output_alternatives)
示例#45
0
  def _model_fn(features, labels, mode):
    """Function that returns predictions, training loss, and training op."""
    weights = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)

    # If we're doing eval, optionally ignore device_assigner.
    # Also ignore device assigner if we're exporting (mode == INFER)
    dev_assn = device_assigner
    if (mode == model_fn_lib.ModeKeys.INFER or
        (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
      dev_assn = None

    graph_builder = graph_builder_class(params,
                                        device_assigner=dev_assn)
    inference = {}
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.INFER):
      inference[eval_metrics.INFERENCE_PROB_NAME] = (
          graph_builder.inference_graph(features))

      if not params.regression:
        inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
            inference[eval_metrics.INFERENCE_PROB_NAME], 1)

      if report_feature_importances:
        inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = (
            graph_builder.feature_importances())

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    training_hooks = []
    scaffold = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
      training_graph = control_flow_ops.group(
          graph_builder.training_graph(
              features, labels, input_weights=weights,
              num_trainers=num_trainers,
              trainer_id=trainer_id),
          state_ops.assign_add(contrib_framework.get_global_step(), 1))
      loss_deps.append(training_graph)
      if hasattr(graph_builder, 'finalize_training'):
        finalize_listener = EveryCheckpointPreSaveListener(
            graph_builder.finalize_training())
        scaffold = monitored_session.Scaffold()
        training_hooks.append(
            basic_session_run_hooks.CheckpointSaverHook(
                model_dir, save_secs=600, save_steps=None,
                scaffold=scaffold,
                listeners=[finalize_listener]))

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL or
        mode == model_fn_lib.ModeKeys.TRAIN):
      with ops.control_dependencies(loss_deps):
        training_loss = graph_builder.training_loss(
            features, labels, name=LOSS_NAME)

    # Put weights back in
    if weights is not None:
      features[weights_name] = weights

    if early_stopping_rounds:
      training_hooks.append(TensorForestLossHook(early_stopping_rounds))

    return model_fn_lib.ModelFnOps(
        mode=mode,
        predictions=inference,
        loss=training_loss,
        train_op=training_graph,
        training_hooks=training_hooks,
        scaffold=scaffold)
示例#46
0
  def _model_fn(features, labels, mode):
    """Function that returns predictions, training loss, and training op."""
    if (isinstance(features, ops.Tensor) or
        isinstance(features, sparse_tensor.SparseTensor)):
      features = {'features': features}
    weights = None
    if weights_name and weights_name in features:
      weights = features.pop(weights_name)

    keys = None
    if keys_name and keys_name in features:
      keys = features.pop(keys_name)

    # If we're doing eval, optionally ignore device_assigner.
    # Also ignore device assigner if we're exporting (mode == INFER)
    dev_assn = device_assigner
    if (mode == model_fn_lib.ModeKeys.INFER or
        (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
      dev_assn = None

    graph_builder = graph_builder_class(params,
                                        device_assigner=dev_assn)

    logits, tree_paths, regression_variance = graph_builder.inference_graph(
        features)

    summary.scalar('average_tree_size', graph_builder.average_size())
    # For binary classification problems, convert probabilities to logits.
    # Includes hack to get around the fact that a probability might be 0 or 1.
    if not params.regression and params.num_classes == 2:
      class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1])
      logits = math_ops.log(
          math_ops.maximum(class_1_probs / math_ops.maximum(
              1.0 - class_1_probs, EPSILON), EPSILON))

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    training_graph = None
    training_hooks = []
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
      with ops.control_dependencies([logits.op]):
        training_graph = control_flow_ops.group(
            graph_builder.training_graph(
                features, labels, input_weights=weights,
                num_trainers=num_trainers,
                trainer_id=trainer_id),
            state_ops.assign_add(contrib_framework.get_global_step(), 1))

    # Put weights back in
    if weights is not None:
      features[weights_name] = weights

    # TensorForest's training graph isn't calculated directly from the loss
    # like many other models.
    def _train_fn(unused_loss):
      return training_graph

    model_ops = model_head.create_model_fn_ops(
        features=features,
        labels=labels,
        mode=mode,
        train_op_fn=_train_fn,
        logits=logits,
        scope=head_scope)

    # Ops are run in lexigraphical order of their keys. Run the resource
    # clean-up op last.
    all_handles = graph_builder.get_all_resource_handles()
    ops_at_end = {
        '9: clean up resources': control_flow_ops.group(
            *[resource_variable_ops.destroy_resource_op(handle)
              for handle in all_handles])}

    if report_feature_importances:
      ops_at_end['1: feature_importances'] = (
          graph_builder.feature_importances())

    training_hooks.append(TensorForestRunOpAtEndHook(ops_at_end))

    if early_stopping_rounds:
      training_hooks.append(
          TensorForestLossHook(
              early_stopping_rounds,
              early_stopping_loss_threshold=early_stopping_loss_threshold,
              loss_op=model_ops.loss))

    model_ops.training_hooks.extend(training_hooks)

    if keys is not None:
      model_ops.predictions[keys_name] = keys

    if params.inference_tree_paths:
      model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths

    if params.regression:
      model_ops.predictions[VARIANCE_PREDICTION_KEY] = regression_variance

    return model_ops
示例#47
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False,
                  increment_global_step=True):
  """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:

  - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - function, takes learning rate `Tensor` as argument and must return
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - class, subclass of `Optimizer` that takes only one required argument -
      learning rate, such as AdamOptimizer, AdagradOptimizer.
      E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`.
  - object, instance of subclass of `Optimizer`.
      E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter to update on each step
                 unless `increment_global_step` is `False`. If not supplied,
                 it will be fetched from the default graph (see
                 `tf.train.get_global_step` for details). If it's
                 not been created, no step will be incremented with each weight
                 update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
                   step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float, callable or `None`. If float, is provided, a global
      clipping is applied to prevent the norm of the gradient to exceed this
      value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
      This callable takes a `list` of `(gradients, variables)` `tuple`s and
      returns the same thing with the gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
                            Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model
      calls `optimize_loss` multiple times per training step (e.g. to optimize
      different parts of the model), use this arg to avoid incrementing
      `global_step` more times than necessary.

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` is wrong type.
        * `clip_gradients` is not float or callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
        * `gradients` is empty
  """
  loss = ops.convert_to_tensor(loss)
  contrib_framework.assert_scalar(loss)
  if global_step is None:
    global_step = contrib_framework.get_global_step()
  else:
    contrib_framework.assert_global_step(global_step)
  with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
    # Update ops take UPDATE_OPS collection if not provided.
    if update_ops is None:
      update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
    # Make sure update ops are ran before computing loss.
    if update_ops:
      loss = control_flow_ops.with_dependencies(list(update_ops), loss)

    # Learning rate variable, with possible decay.
    lr = None
    if learning_rate is not None:
      if (isinstance(learning_rate, ops.Tensor) and
          learning_rate.get_shape().ndims == 0):
        lr = learning_rate
      elif isinstance(learning_rate, float):
        if learning_rate < 0.0:
          raise ValueError("Invalid learning_rate %s.", learning_rate)
        lr = vs.get_variable(
            "learning_rate", [],
            trainable=False,
            initializer=init_ops.constant_initializer(learning_rate))
      else:
        raise ValueError("Learning rate should be 0d Tensor or float. "
                         "Got %s of type %s" % (str(learning_rate),
                                                str(type(learning_rate))))
    if summaries is None:
      summaries = ["loss", "learning_rate"]
    else:
      for summ in summaries:
        if summ not in OPTIMIZER_SUMMARIES:
          raise ValueError("Summaries should be one of [%s], you provided %s." %
                           (", ".join(OPTIMIZER_SUMMARIES), summ))
    if learning_rate is not None and learning_rate_decay_fn is not None:
      if global_step is None:
        raise ValueError("global_step is required for learning_rate_decay_fn.")
      lr = learning_rate_decay_fn(lr, global_step)
      if "learning_rate" in summaries:
        summary.scalar("learning_rate", lr)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is string (%s)." % optimizer)
      if optimizer not in OPTIMIZER_CLS_NAMES:
        raise ValueError(
            "Optimizer name should be one of [%s], you provided %s." %
            (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
      opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
    elif (isinstance(optimizer, type) and
          issubclass(optimizer, optimizer_.Optimizer)):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is class (%s)." % optimizer)
      opt = optimizer(learning_rate=lr)
    elif isinstance(optimizer, optimizer_.Optimizer):
      opt = optimizer
    elif callable(optimizer):
      if learning_rate is not None:
        opt = optimizer(lr)
      else:
        opt = optimizer()
      if not isinstance(opt, optimizer_.Optimizer):
        raise ValueError("Unrecognized optimizer: function should return "
                         "subclass of Optimizer. Got %s." % str(opt))
    else:
      raise ValueError("Unrecognized optimizer: should be string, "
                       "subclass of Optimizer, instance of "
                       "subclass of Optimizer or function with one argument. "
                       "Got %s." % str(optimizer))

    # All trainable variables, if specific variables are not specified.
    if variables is None:
      variables = vars_.trainable_variables()

    # Compute gradients.
    gradients = opt.compute_gradients(
        loss,
        variables,
        colocate_gradients_with_ops=colocate_gradients_with_ops)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
      gradients = _add_scaled_noise_to_gradients(gradients,
                                                 gradient_noise_scale)

    # Multiply some gradients.
    if gradient_multipliers is not None:
      gradients = _multiply_gradients(gradients, gradient_multipliers)
      if not gradients:
        raise ValueError(
            "Empty list of (gradient, var) pairs encountered. This is most "
            "likely to be caused by an improper value of gradient_multipliers.")

    if "gradient_norm" in summaries:
      summary.scalar("global_norm/gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Optionally clip gradients by global norm.
    if isinstance(clip_gradients, float):
      gradients = _clip_gradients_by_norm(gradients, clip_gradients)
    elif callable(clip_gradients):
      gradients = clip_gradients(gradients)
    elif clip_gradients is not None:
      raise ValueError(
          "Unknown type %s for clip_gradients" % type(clip_gradients))

    # Add scalar summary for loss.
    if "loss" in summaries:
      summary.scalar("loss", loss)

    # Add histograms for variables, gradients and gradient norms.
    for gradient, variable in gradients:
      if isinstance(gradient, ops.IndexedSlices):
        grad_values = gradient.values
      else:
        grad_values = gradient

      if grad_values is not None:
        var_name = variable.name.replace(":", "_")
        if "gradients" in summaries:
          summary.histogram("gradients/%s" % var_name, grad_values)
        if "gradient_norm" in summaries:
          summary.scalar("gradient_norm/%s" % var_name,
                         clip_ops.global_norm([grad_values]))

    if clip_gradients is not None and "gradient_norm" in summaries:
      summary.scalar("global_norm/clipped_gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Create gradient updates.
    grad_updates = opt.apply_gradients(
        gradients,
        global_step=global_step if increment_global_step else None,
        name="train")

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

    return train_tensor
示例#48
0
文件: task.py 项目: spwcd/QTML
def conv_model_train_op(loss, mode):
    return layers.optimize_loss(loss, framework.get_global_step(), learning_rate=0.003, optimizer="Adam",
        # to remove learning rate decay, comment the next line
        learning_rate_decay_fn=lambda lr, step: 0.0001 + tf.train.exponential_decay(lr, step, -2000, math.e)
        ) if mode == learn.ModeKeys.TRAIN else None