def _testExampleWeight(self, n_classes): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor( values=['the', 'cat', 'sat', 'dog', 'barked'], indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], dense_shape=[2, 3]), 'w': [[1], [2]], }, [[1], [0]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] est = rnn.RNNClassifier(num_units=cell_units, sequence_feature_columns=[embed], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def testMultiClassFromCheckpoint(self): initial_global_step = 100 create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=initial_global_step, model_dir=self._model_dir) def train_input_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] # Uses same checkpoint and examples as testMultiClassEvaluationMetrics. # See that test for loss calculation. mock_optimizer = self._mock_optimizer(expected_loss=2.662932) sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1, )) ] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) est.train(input_fn=train_input_fn, steps=10) self.assertEqual(1, mock_optimizer.minimize.call_count)
def testFromScratchWithCustomRNNCellFn(self): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor(values=['the', 'cat', 'sat'], indices=[[0, 0], [0, 1], [0, 2]], dense_shape=[1, 3]), }, [[1]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] n_classes = 2 def rnn_cell_fn(mode): del mode # unused cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units] return rnn_cell.MultiRNNCell(cells) est = rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=rnn_cell_fn, n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def testConflictingRNNCellFn(self): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) cell_units = [4, 2] with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn' ): rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, num_units=cell_units) with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn' ): rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, cell_type='lstm')
def testMultiClassEvaluationMetrics(self): global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExampleMultiDim. # See that test for logits calculation. # logits = [[-0.603282, 0.777708, 0.569756], # [-1.247356, 1.017018, 0.574481]] # logits_exp = exp(logits) / (1 + exp(logits)) # = [[0.547013, 2.176468, 1.767836], # [0.287263, 2.764937, 1.776208]] # softmax_probabilities = logits_exp / logits_exp.sum() # = [[0.121793, 0.484596, 0.393611], # [0.059494, 0.572639, 0.367866]] # loss = -1. * log(softmax[label]) # = [[2.105432], [0.557500]] # sum_over_batch_size = (2.105432 + 0.557500)/2 expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.331465, metric_keys.MetricKeys.LOSS_MEAN: 1.331466, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def testBinaryClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testOneDimLogits. # See that test for logits calculation. # logits = [-0.603282] # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593] # probabilities = [0.646407, 0.353593] # class_ids = argmax(probabilities) = [0] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.353593], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.646407, 0.353593], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_0'], predictions[prediction_keys.PredictionKeys.CLASSES])
def testMultiClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1', 'class_2'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testMultiDimLogits. # See that test for logits calculation. # logits = [-0.603282, 0.777708, 0.569756] # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836] # softmax_probabilities = logits_exp / logits_exp.sum() # = [0.121793, 0.484596, 0.393611] # class_ids = argmax(probabilities) = [1] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282, 0.777708, 0.569756], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.121793, 0.484596, 0.393611], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_1'], predictions[prediction_keys.PredictionKeys.CLASSES])
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] cell_units = [4, 2] est = rnn.RNNClassifier(num_units=cell_units, sequence_feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testBinaryClassEvaluationMetrics(self): global_step = 100 create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1, )) ] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExamplesWithDifferentLength. # See that test for logits calculation. # logits = [[-0.603282], [0.019719]] # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # = [[0.436326], [0.683335]] expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.119661, metric_keys.MetricKeys.LOSS_MEAN: 0.559831, metric_keys.MetricKeys.ACCURACY: 1.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # With default threshold of 0.5, the model is a perfect classifier. metric_keys.MetricKeys.RECALL: 1.0, metric_keys.MetricKeys.PRECISION: 1.0, # Positive example is scored above negative, so AUC = 1.0. metric_keys.MetricKeys.AUC: 1.0, metric_keys.MetricKeys.AUC_PR: 1.0, } self.assertAllClose(sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir): return rnn.RNNClassifier( num_units=cell_units, sequence_feature_columns=feature_columns, n_classes=n_classes, model_dir=model_dir)