def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'weight_column should not be used as feature'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('a'))
def test_weight_column_should_be_a_numeric_column(self): with self.assertRaisesRegexp(ValueError, 'tf.feature_column.numeric_column'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column='NotANumericColumn')
def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'weight_column should not be used as feature'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('a'))
def test_weight_column_should_be_a_numeric_column(self): with self.assertRaisesRegexp(ValueError, 'tf.feature_column.numeric_column'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column='NotANumericColumn')
def _test_complete_flow(self, feature_columns, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size): cell_units = [4, 2] est = self._create_estimator_fn(feature_columns, n_classes, cell_units, self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow(self, feature_columns, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size): cell_units = [4, 2] est = self._create_estimator_fn(feature_columns, n_classes, cell_units, self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_defaults(self): parsing_spec = parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b') expected_spec = { 'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64), } self.assertDictEqual(expected_spec, parsing_spec)
def test_defaults(self): parsing_spec = parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b') expected_spec = { 'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64), } self.assertDictEqual(expected_spec, parsing_spec)
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)