def testIrisInputFn(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) est.fit(input_fn=iris_input_fn, steps=100) _ = est.evaluate(input_fn=iris_input_fn, steps=1) predictions = list(est.predict(x=iris.data)) self.assertEqual(len(predictions), iris.target.shape[0])
def testIrisDNN(self): iris = base.load_iris() feature_columns = [feature_column.real_valued_column("", dimension=4)] classifier = dnn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, config=run_config.RunConfig(tf_random_seed=1)) classifier.fit(iris.data, iris.target, max_steps=200) variable_names = classifier.get_variable_names() self.assertEqual( classifier.get_variable_value("dnn/hiddenlayer_0/weights").shape, (4, 10)) self.assertEqual( classifier.get_variable_value("dnn/hiddenlayer_1/weights").shape, (10, 20)) self.assertEqual( classifier.get_variable_value("dnn/hiddenlayer_2/weights").shape, (20, 10)) self.assertEqual( classifier.get_variable_value("dnn/logits/weights").shape, (10, 3)) self.assertIn("dnn/hiddenlayer_0/biases", variable_names) self.assertIn("dnn/hiddenlayer_1/biases", variable_names) self.assertIn("dnn/hiddenlayer_2/biases", variable_names) self.assertIn("dnn/logits/biases", variable_names)
def testAdditionalOutputs(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=1, max_nodes=100, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.CoreTensorForestEstimator( hparams.fill(), keys_column='keys', include_all_in_serving=True) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn(x={ 'x': data, 'keys': np.arange(len(iris.data)).reshape(150, 1) }, y=labels, batch_size=10, num_epochs=1, shuffle=False) classifier.train(input_fn=input_fn, steps=100) predictions = list(classifier.predict(input_fn=input_fn)) # Check that there is a key column, tree paths and var. for pred in predictions: self.assertTrue('keys' in pred) self.assertTrue('tree_paths' in pred) self.assertTrue('prediction_variance' in pred)
def _input_fn(): iris = base.load_iris() return { 'feature': constant_op.constant( iris.data, dtype=dtypes.float32) }, constant_op.constant( iris.target, shape=[150], dtype=dtypes.int32)
def iris_input_fn(): iris = base.load_iris() features = tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM]) labels = tf.reshape(tf.constant(iris.target), [-1]) return features, labels
def testWithFeatureColumns(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, feature_columns=[core_feature_column.numeric_column('x')]) iris = base.load_iris() data = {'x': iris.data.astype(np.float32)} labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testIrisAllDictionaryInput(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) iris_data = {'input': iris.data} iris_target = {'labels': iris.target} est.fit(iris_data, iris_target, steps=100) scores = est.evaluate(x=iris_data, y=iris_target, metrics={ ('accuracy', 'class'): metric_ops.streaming_accuracy }) predictions = list(est.predict(x=iris_data)) predictions_class = list(est.predict(x=iris_data, outputs=['class'])) self.assertEqual(len(predictions), iris.target.shape[0]) classes_batch = np.array([p['class'] for p in predictions]) self.assertAllClose(classes_batch, np.array([p['class'] for p in predictions_class])) self.assertAllClose( classes_batch, np.argmax(np.array([p['prob'] for p in predictions]), axis=1)) other_score = _sklearn.accuracy_score(iris.target, classes_batch) self.assertAllClose(other_score, scores['accuracy']) self.assertTrue('global_step' in scores) self.assertEqual(scores['global_step'], 100)
def testIrisAllDictionaryInput(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) iris_data = {'input': iris.data} iris_target = {'labels': iris.target} est.fit(iris_data, iris_target, steps=100) scores = est.evaluate( x=iris_data, y=iris_target, metrics={ ('accuracy', 'class'): metric_ops.streaming_accuracy }) predictions = list(est.predict(x=iris_data)) predictions_class = list(est.predict(x=iris_data, outputs=['class'])) self.assertEqual(len(predictions), iris.target.shape[0]) classes_batch = np.array([p['class'] for p in predictions]) self.assertAllClose(classes_batch, np.array([p['class'] for p in predictions_class])) self.assertAllClose(classes_batch, np.argmax( np.array([p['prob'] for p in predictions]), axis=1)) other_score = _sklearn.accuracy_score(iris.target, classes_batch) self.assertAllClose(other_score, scores['accuracy']) self.assertTrue('global_step' in scores) self.assertEqual(scores['global_step'], 100)
def iris_input_multiclass_fn(): iris = base.load_iris() return { 'feature': constant_op.constant( iris.data, dtype=dtypes.float32) }, constant_op.constant( iris.target, shape=(150, 1), dtype=dtypes.int32)
def testAdditionalOutputs(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=1, max_nodes=100, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.CoreTensorForestEstimator( hparams.fill(), keys_column='keys', include_all_in_serving=True) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x={ 'x': data, 'keys': np.arange(len(iris.data)).reshape(150, 1) }, y=labels, batch_size=10, num_epochs=1, shuffle=False) classifier.train(input_fn=input_fn, steps=100) predictions = list(classifier.predict(input_fn=input_fn)) # Check that there is a key column, tree paths and var. for pred in predictions: self.assertTrue('keys' in pred) self.assertTrue('tree_paths' in pred) self.assertTrue('prediction_variance' in pred)
def testWithFeatureColumns(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, feature_columns=[core_feature_column.numeric_column('x')]) iris = base.load_iris() data = {'x': iris.data.astype(np.float32)} labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn(x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testIrisIteratorPlainInt(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) x_iter = itertools.islice(iris.data, 100) y_iter = (v for v in iris.target) est.fit(x_iter, y_iter, steps=100) _ = est.evaluate(input_fn=iris_input_fn, steps=1) _ = six.next(est.predict(x=iris.data))['class']
def _iris_data_input_fn(): # Converts iris data to a logistic regression problem. iris = base.load_iris() ids = np.where((iris.target == 0) | (iris.target == 1)) features = constant_op.constant(iris.data[ids], dtype=dtypes.float32) labels = constant_op.constant(iris.target[ids], dtype=dtypes.float32) labels = array_ops.reshape(labels, labels.get_shape().concatenate(1)) return features, labels
def iris_input_fn_labels_dict(): iris = base.load_iris() features = array_ops.reshape( constant_op.constant(iris.data), [-1, _IRIS_INPUT_DIM]) labels = { 'labels': array_ops.reshape(constant_op.constant(iris.target), [-1]) } return features, labels
def iris_input_fn_labels_dict(): iris = base.load_iris() features = array_ops.reshape(constant_op.constant(iris.data), [-1, _IRIS_INPUT_DIM]) labels = { 'labels': array_ops.reshape(constant_op.constant(iris.target), [-1]) } return features, labels
def testIrisIterator(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) x_iter = itertools.islice(iris.data, 100) y_iter = itertools.islice(iris.target, 100) est.fit(x_iter, y_iter, steps=100) _ = est.evaluate(input_fn=iris_input_fn, steps=1) predictions = list(est.predict(x=iris.data)) self.assertEqual(len(predictions), iris.target.shape[0])
def testMultiClass_NpMatrixData(self): """Tests multi-class classification using numpy matrix data as input.""" iris = base.load_iris() train_x = iris.data train_y = iris.target classifier = debug.DebugClassifier(n_classes=3) classifier.fit(x=train_x, y=train_y, steps=200) scores = classifier.evaluate(x=train_x, y=train_y, steps=1) self._assertInRange(0.0, 1.0, scores['accuracy'])
def testDNNDropout0(self): # Dropout prob == 0. iris = base.load_iris() feature_columns = [feature_column.real_valued_column("", dimension=4)] classifier = dnn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, dropout=0.0, config=run_config.RunConfig(tf_random_seed=1)) classifier.fit(iris.data, iris.target, max_steps=200)
def _get_classification_input_fns(): iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) train_input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x=data[:1,], y=None, batch_size=1, num_epochs=1, shuffle=False) return train_input_fn, predict_input_fn
def MLP_iris(): # load the iris data. iris = load_iris() np.random.seed(0) random_index = np.random.permutation(150) iris_data = iris.data[random_index] iris_target = iris.target[random_index] iris_target_onehot = np.zeros((150, 3)) iris_target_onehot[np.arange(150), iris_target] = 1 accuracy_list = [] # build computation graph x = tf.placeholder("float", shape=[None, 4], name='x') y_target = tf.placeholder("float", shape=[None, 3], name='y_target') W1 = tf.Variable(tf.zeros([4, 128]), name='W1') b1 = tf.Variable(tf.zeros([128]), name='b1') h1 = tf.sigmoid(tf.matmul(x, W1) + b1, name='h1') W2 = tf.Variable(tf.zeros([128, 3]), name='W2') b2 = tf.Variable(tf.zeros([3]), name='b2') y = tf.nn.softmax(tf.matmul(h1, W2) + b2, name='y') cross_entropy = -tf.reduce_sum(y_target * tf.log(y), name='cross_entropy') train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_target, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) sess.run(tf.global_variables_initializer()) for i in range(500): sess.run(train_step, feed_dict={x: iris_data[0:100], y_target: iris_target_onehot[0:100]}) train_accuracy = sess.run(accuracy, feed_dict={x: iris_data[0:100], y_target: iris_target_onehot[0:100]}) validation_accuracy = sess.run(accuracy, feed_dict={x: iris_data[100:], y_target: iris_target_onehot[100:]}) print ( "step %d, training accuracy: %.3f / validation accuracy: %.3f" % (i, train_accuracy, validation_accuracy)) accuracy_list.append(validation_accuracy) if i >= 50: if validation_accuracy - np.mean(accuracy_list[int(round(len(accuracy_list) / 2)):]) <= 0.01: break sess.close()
def testClassificationTrainingLoss(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4) classifier = random_forest.TensorForestEstimator( hparams, graph_builder_class=(tensor_forest.TrainingLossForest)) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.float32) monitors = [random_forest.TensorForestLossHook(10)] classifier.fit(x=data, y=labels, steps=100, monitors=monitors) classifier.evaluate(x=data, y=labels, steps=10)
def testIrisInputFnLabelsDict(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) est.fit(input_fn=iris_input_fn_labels_dict, steps=100) _ = est.evaluate(input_fn=iris_input_fn_labels_dict, steps=1, metrics={ 'accuracy': metric_spec.MetricSpec( metric_fn=metric_ops.streaming_accuracy, prediction_key='class', label_key='labels') }) predictions = list(est.predict(x=iris.data)) self.assertEqual(len(predictions), iris.target.shape[0])
def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20) classifier = random_forest.TensorForestEstimator(hparams.fill()) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.float32) classifier.fit(x=data, y=labels, steps=100, batch_size=50) classifier.evaluate(x=data, y=labels, steps=10)
def testMultiClass_NpMatrixData(self): """Tests multi-class classification using numpy matrix data as input.""" iris = base.load_iris() train_x = iris.data train_y = iris.target feature_columns = [feature_column.real_valued_column('', dimension=4)] classifier = dnn.DNNClassifier( n_classes=3, feature_columns=feature_columns, hidden_units=[3, 3], config=run_config.RunConfig(tf_random_seed=1)) classifier.fit(x=train_x, y=train_y, steps=200) scores = classifier.evaluate(x=train_x, y=train_y, steps=1) self._assertInRange(0.0, 1.0, scores['accuracy'])
def testIrisIterator(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) x_iter = itertools.islice(iris.data, 100) y_iter = itertools.islice(iris.target, 100) estimator.SKCompat(est).fit(x_iter, y_iter, steps=20) eval_result = est.evaluate(input_fn=iris_input_fn, steps=1) x_iter_eval = itertools.islice(iris.data, 100) y_iter_eval = itertools.islice(iris.target, 100) score_result = estimator.SKCompat(est).score(x_iter_eval, y_iter_eval) print(score_result) self.assertItemsEqual(eval_result.keys(), score_result.keys()) self.assertItemsEqual(['global_step', 'loss'], score_result.keys()) predictions = estimator.SKCompat(est).predict(x=iris.data)['class'] self.assertEqual(len(predictions), iris.target.shape[0])
def testIrisInputFnLabelsDict(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) est.fit(input_fn=iris_input_fn_labels_dict, steps=100) _ = est.evaluate( input_fn=iris_input_fn_labels_dict, steps=1, metrics={ 'accuracy': metric_spec.MetricSpec( metric_fn=metric_ops.streaming_accuracy, prediction_key='class', label_key='labels') }) predictions = list(est.predict(x=iris.data)) self.assertEqual(len(predictions), iris.target.shape[0])
def testIrisDNN(self): iris = base.load_iris() feature_columns = [feature_column.real_valued_column("", dimension=4)] classifier = dnn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, config=run_config.RunConfig(tf_random_seed=1)) classifier.fit(iris.data, iris.target, max_steps=200) weights = classifier.weights_ self.assertEqual(weights[0].shape, (4, 10)) self.assertEqual(weights[1].shape, (10, 20)) self.assertEqual(weights[2].shape, (20, 10)) self.assertEqual(weights[3].shape, (10, 3)) biases = classifier.bias_ self.assertEqual(len(biases), 4)
def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20) classifier = random_forest.TensorForestEstimator(hparams.fill()) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) classifier.fit(x=data, y=labels, steps=100, batch_size=50) classifier.evaluate(x=data, y=labels, steps=10)
def benchmarkMultiClass(self): iris = base.load_iris() cont_feature = feature_column.real_valued_column('feature', dimension=4) bucketized_feature = feature_column.bucketized_column( cont_feature, test_data.get_quantile_based_buckets(iris.data, 10)) classifier = dnn_linear_combined.DNNLinearCombinedClassifier( n_classes=3, linear_feature_columns=(bucketized_feature,), dnn_feature_columns=(cont_feature,), dnn_hidden_units=(3, 3)) input_fn = test_data.iris_input_multiclass_fn metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate( input_fn=input_fn, steps=100) self._assertCommonMetrics(metrics)
def _get_classification_input_fns(): iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) train_input_fn = numpy_io.numpy_input_fn(x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x=data[:1, ], y=None, batch_size=1, num_epochs=1, shuffle=False) return train_input_fn, predict_input_fn
def benchmarkMultiClass(self): iris = base.load_iris() cont_feature = feature_column.real_valued_column('feature', dimension=4) bucketized_feature = feature_column.bucketized_column( cont_feature, test_data.get_quantile_based_buckets(iris.data, 10)) classifier = dnn_linear_combined.DNNLinearCombinedClassifier( n_classes=3, linear_feature_columns=(bucketized_feature, ), dnn_feature_columns=(cont_feature, ), dnn_hidden_units=(3, 3)) input_fn = test_data.iris_input_multiclass_fn metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(input_fn=input_fn, steps=100) self._assertCommonMetrics(metrics)
def testIrisAll(self): iris = base.load_iris() est = estimator.SKCompat( estimator.Estimator(model_fn=logistic_model_no_mode_fn)) est.fit(iris.data, iris.target, steps=100) scores = est.score( x=iris.data, y=iris.target, metrics={('accuracy', 'class'): metric_ops.streaming_accuracy}) predictions = est.predict(x=iris.data) predictions_class = est.predict(x=iris.data, outputs=['class'])['class'] self.assertEqual(predictions['prob'].shape[0], iris.target.shape[0]) self.assertAllClose(predictions['class'], predictions_class) self.assertAllClose( predictions['class'], np.argmax( predictions['prob'], axis=1)) other_score = _sklearn.accuracy_score(iris.target, predictions['class']) self.assertAllClose(scores['accuracy'], other_score) self.assertTrue('global_step' in scores) self.assertEqual(100, scores['global_step'])
def prepare_iris_data_for_logistic_regression(): # Converts iris data to a logistic regression problem. iris = base.load_iris() ids = np.where((iris.target == 0) | (iris.target == 1)) return base.Dataset(data=iris.data[ids], target=iris.target[ids])
# -*- coding: utf-8 -*- from tensorflow.contrib.learn.python.learn.datasets import base iris_data, iris_label = base.load_iris() house_data, house_label = base.load_boston()
def MLP_iris(): # load the iris data. iris = load_iris() np.random.seed(0) random_index = np.random.permutation(150) iris_data = iris.data[random_index] iris_target = iris.target[random_index] iris_target_onehot = np.zeros((150, 3)) iris_target_onehot[np.arange(150), iris_target] = 1 accuracy_list = [] # build computation graph x = tf.placeholder("float", shape=[None, 4], name='x') y_target = tf.placeholder("float", shape=[None, 3], name='y_target') W1 = tf.Variable(tf.zeros([4, 128]), name='W1') b1 = tf.Variable(tf.zeros([128]), name='b1') h1 = tf.sigmoid(tf.matmul(x, W1) + b1, name='h1') W2 = tf.Variable(tf.zeros([128, 3]), name='W2') b2 = tf.Variable(tf.zeros([3]), name='b2') y = tf.nn.softmax(tf.matmul(h1, W2) + b2, name='y') cross_entropy = -tf.reduce_sum(y_target * tf.log(y), name='cross_entropy') train_step = tf.train.GradientDescentOptimizer(0.01).minimize( cross_entropy) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_target, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) sess.run(tf.global_variables_initializer()) for i in range(500): sess.run(train_step, feed_dict={ x: iris_data[0:100], y_target: iris_target_onehot[0:100] }) train_accuracy = sess.run(accuracy, feed_dict={ x: iris_data[0:100], y_target: iris_target_onehot[0:100] }) validation_accuracy = sess.run(accuracy, feed_dict={ x: iris_data[100:], y_target: iris_target_onehot[100:] }) print("step %d, training accuracy: %.3f / validation accuracy: %.3f" % (i, train_accuracy, validation_accuracy)) accuracy_list.append(validation_accuracy) if i >= 50: if validation_accuracy - np.mean( accuracy_list[int(round(len(accuracy_list) / 2)):]) <= 0.01: break sess.close()
def testIrisTruncatedIterator(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) x_iter = itertools.islice(iris.data, 50) y_iter = ([np.int32(v)] for v in iris.target) est.fit(x_iter, y_iter, steps=100)