示例#1
0
    def _testExampleWeight(self, n_classes):
        def train_input_fn():
            return {
                'tokens':
                sparse_tensor.SparseTensor(
                    values=['the', 'cat', 'sat', 'dog', 'barked'],
                    indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
                    dense_shape=[2, 3]),
                'w': [[1], [2]],
            }, [[1], [0]]

        col = seq_fc.sequence_categorical_column_with_hash_bucket(
            'tokens', hash_bucket_size=10)
        embed = fc.embedding_column(col, dimension=2)
        input_units = 2

        cell_units = [4, 2]
        est = rnn.RNNClassifier(num_units=cell_units,
                                sequence_feature_columns=[embed],
                                n_classes=n_classes,
                                weight_column='w',
                                model_dir=self._model_dir)

        # Train for a few steps, and validate final checkpoint.
        num_steps = 10
        est.train(input_fn=train_input_fn, steps=num_steps)
        self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
示例#2
0
    def testMultiClassFromCheckpoint(self):
        initial_global_step = 100
        create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
                          rnn_biases=[.2, .5],
                          logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
                          logits_biases=[0.3, 0.4, 0.5],
                          global_step=initial_global_step,
                          model_dir=self._model_dir)

        def train_input_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2., 7.],
                                           indices=[[0, 0], [0, 1], [1, 0],
                                                    [1, 1]],
                                           dense_shape=[2, 2]),
            }, [[0], [1]]

        # Uses same checkpoint and examples as testMultiClassEvaluationMetrics.
        # See that test for loss calculation.
        mock_optimizer = self._mock_optimizer(expected_loss=2.662932)

        sequence_feature_columns = [
            seq_fc.sequence_numeric_column('price', shape=(1, ))
        ]
        est = rnn.RNNClassifier(
            num_units=[2],
            sequence_feature_columns=sequence_feature_columns,
            n_classes=3,
            optimizer=mock_optimizer,
            model_dir=self._model_dir)
        self.assertEqual(0, mock_optimizer.minimize.call_count)
        est.train(input_fn=train_input_fn, steps=10)
        self.assertEqual(1, mock_optimizer.minimize.call_count)
示例#3
0
    def testFromScratchWithCustomRNNCellFn(self):
        def train_input_fn():
            return {
                'tokens':
                sparse_tensor.SparseTensor(values=['the', 'cat', 'sat'],
                                           indices=[[0, 0], [0, 1], [0, 2]],
                                           dense_shape=[1, 3]),
            }, [[1]]

        col = seq_fc.sequence_categorical_column_with_hash_bucket(
            'tokens', hash_bucket_size=10)
        embed = fc.embedding_column(col, dimension=2)
        input_units = 2
        cell_units = [4, 2]
        n_classes = 2

        def rnn_cell_fn(mode):
            del mode  # unused
            cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units]
            return rnn_cell.MultiRNNCell(cells)

        est = rnn.RNNClassifier(sequence_feature_columns=[embed],
                                rnn_cell_fn=rnn_cell_fn,
                                n_classes=n_classes,
                                model_dir=self._model_dir)

        # Train for a few steps, and validate final checkpoint.
        num_steps = 10
        est.train(input_fn=train_input_fn, steps=num_steps)
        self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
示例#4
0
    def testConflictingRNNCellFn(self):
        col = seq_fc.sequence_categorical_column_with_hash_bucket(
            'tokens', hash_bucket_size=10)
        embed = fc.embedding_column(col, dimension=2)
        cell_units = [4, 2]

        with self.assertRaisesRegexp(
                ValueError,
                'num_units and cell_type must not be specified when using rnn_cell_fn'
        ):
            rnn.RNNClassifier(sequence_feature_columns=[embed],
                              rnn_cell_fn=lambda x: x,
                              num_units=cell_units)

        with self.assertRaisesRegexp(
                ValueError,
                'num_units and cell_type must not be specified when using rnn_cell_fn'
        ):
            rnn.RNNClassifier(sequence_feature_columns=[embed],
                              rnn_cell_fn=lambda x: x,
                              cell_type='lstm')
  def testMultiClassEvaluationMetrics(self):
    global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=global_step,
        model_dir=self._model_dir)

    def eval_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }, [[0], [1]]

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        model_dir=self._model_dir)
    eval_metrics = est.evaluate(eval_input_fn, steps=1)

    # Uses identical numbers to testMultiExampleMultiDim.
    # See that test for logits calculation.
    # logits = [[-0.603282, 0.777708, 0.569756],
    #           [-1.247356, 1.017018, 0.574481]]
    # logits_exp = exp(logits) / (1 + exp(logits))
    #            = [[0.547013, 2.176468, 1.767836],
    #               [0.287263, 2.764937, 1.776208]]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [[0.121793, 0.484596, 0.393611],
    #                          [0.059494, 0.572639, 0.367866]]
    # loss = -1. * log(softmax[label])
    #      = [[2.105432], [0.557500]]
    # sum_over_batch_size = (2.105432 + 0.557500)/2
    expected_metrics = {
        ops.GraphKeys.GLOBAL_STEP: global_step,
        metric_keys.MetricKeys.LOSS: 1.331465,
        metric_keys.MetricKeys.LOSS_MEAN: 1.331466,
        metric_keys.MetricKeys.ACCURACY: 0.5,
    }

    self.assertAllClose(
        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
  def testBinaryClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=2,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testOneDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282]
    # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593]
    # probabilities = [0.646407, 0.353593]
    # class_ids = argmax(probabilities) = [0]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose([0.353593],
                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
    self.assertAllClose(
        [0.646407, 0.353593],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([0],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_0'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
  def testMultiClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1', 'class_2']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testMultiDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282, 0.777708, 0.569756]
    # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [0.121793, 0.484596, 0.393611]
    # class_ids = argmax(probabilities) = [1]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282, 0.777708, 0.569756],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose(
        [0.121793, 0.484596, 0.393611],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([1],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_1'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
示例#8
0
    def _test_complete_flow(self, train_input_fn, eval_input_fn,
                            predict_input_fn, n_classes, batch_size):
        col = seq_fc.sequence_categorical_column_with_hash_bucket(
            'tokens', hash_bucket_size=10)
        embed = fc.embedding_column(col, dimension=2)
        feature_columns = [embed]

        cell_units = [4, 2]
        est = rnn.RNNClassifier(num_units=cell_units,
                                sequence_feature_columns=feature_columns,
                                n_classes=n_classes,
                                model_dir=self._model_dir)

        # TRAIN
        num_steps = 10
        est.train(train_input_fn, steps=num_steps)

        # EVALUATE
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        # PREDICT
        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PROBABILITIES]
            for x in est.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

        # EXPORT
        feature_spec = {
            'tokens': parsing_ops.VarLenFeature(dtypes.string),
            'label': parsing_ops.FixedLenFeature([1], dtypes.int64),
        }
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                           serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
示例#9
0
    def testBinaryClassEvaluationMetrics(self):
        global_step = 100
        create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
                          rnn_biases=[.2, .5],
                          logits_weights=[[-1.], [1.]],
                          logits_biases=[0.3],
                          global_step=global_step,
                          model_dir=self._model_dir)

        def eval_input_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
            }, [[0], [1]]

        sequence_feature_columns = [
            seq_fc.sequence_numeric_column('price', shape=(1, ))
        ]

        est = rnn.RNNClassifier(
            num_units=[2],
            sequence_feature_columns=sequence_feature_columns,
            n_classes=2,
            model_dir=self._model_dir)
        eval_metrics = est.evaluate(eval_input_fn, steps=1)

        # Uses identical numbers to testMultiExamplesWithDifferentLength.
        # See that test for logits calculation.
        # logits = [[-0.603282], [0.019719]]
        # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]]
        # loss = -label * ln(p) - (1 - label) * ln(1 - p)
        #      = [[0.436326], [0.683335]]
        expected_metrics = {
            ops.GraphKeys.GLOBAL_STEP:
            global_step,
            metric_keys.MetricKeys.LOSS:
            1.119661,
            metric_keys.MetricKeys.LOSS_MEAN:
            0.559831,
            metric_keys.MetricKeys.ACCURACY:
            1.0,
            metric_keys.MetricKeys.PREDICTION_MEAN:
            0.429262,
            metric_keys.MetricKeys.LABEL_MEAN:
            0.5,
            metric_keys.MetricKeys.ACCURACY_BASELINE:
            0.5,
            # With default threshold of 0.5, the model is a perfect classifier.
            metric_keys.MetricKeys.RECALL:
            1.0,
            metric_keys.MetricKeys.PRECISION:
            1.0,
            # Positive example is scored above negative, so AUC = 1.0.
            metric_keys.MetricKeys.AUC:
            1.0,
            metric_keys.MetricKeys.AUC_PR:
            1.0,
        }
        self.assertAllClose(sorted_key_dict(expected_metrics),
                            sorted_key_dict(eval_metrics))
def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir):
  return rnn.RNNClassifier(
      num_units=cell_units,
      sequence_feature_columns=feature_columns,
      n_classes=n_classes,
      model_dir=model_dir)