def _get_eval_ops(self, features, targets, metrics): features, _, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) _assert_float32(features) _assert_float32(labels) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, training=False, **self.construction_args) probabilities = graph_builder.inference_graph(features, data_spec=spec) # One-hot the labels. if not self.params.regression: labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64( array_ops.squeeze(labels)), self.params.num_classes, 1, 0)) if metrics is None: metrics = {self.accuracy_metric: eval_metrics.get_metric(self.accuracy_metric)} result = {} for name, metric in six.iteritems(metrics): result[name] = metric(probabilities, labels) return result
def rf_train(x_train, y_train, x_test, y_test): params = tensor_forest.ForestHParams(num_classes=10, num_features=784, num_trees=100, max_nodes=10000) graph_builder_class = tensor_forest.TrainingLossForest est = estimator.SKCompat( random_forest.TensorForestEstimator( params, graph_builder_class=graph_builder_class, model_dir="./models")) est.fit(x=x_train, y=y_train, batch_size=128) metric_name = "accuracy" metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } results = est.score(x=x_test, y=y_test, batch_size=128, metrics=metric) for key in sorted(results): print("%s: %s" % (key, results[key]))
def train_and_eval(config): """Train and evaluate the model.""" print 'model directory = %s' % config.model_output num_features = 1e3 model = train_rf(num_features, config) # Early stopping if the forest is no longer growing. monitor = random_forest.TensorForestLossHook(config.early_stopping_rounds) # TFLearn doesn't support tfrecords; extract them by hand for now img, label, feat = get_records( os.path.join(config.tfrecord_dir, 'train.tfrecords')) model.fit( x=feat, y=label, batch_size=config.batch_size, monitors=[monitor]) metric_name = 'accuracy' metric = {metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name))} test_img, test_label, test_feat = get_records( os.path.join(config.tfrecord_dir, 'val.tfrecords')) results = model.evaluate( x=test_img, y=test_label, batch_size=config.batch_size, metrics=metric) return results
def train_and_eval(): """Train and evaluate the model.""" model_dir = 'data/model' print('model directory = %s' % model_dir) est = build_estimator(model_dir) mnist = input_data.read_data_sets('MNIST_data', one_hot=False) with tf.device('/gpu:0'): est.fit(x=mnist.train.images, y=mnist.train.labels, batch_size=100, steps=10) # results2=est.predict(x=mnist.test.images, y=mnist.test.labels, batch_size=100) # print(results2) metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } results = est.score(x=mnist.test.images, y=mnist.test.labels, batch_size=100) for key in sorted(results): print('%s: %s' % (key, results[key]))
def train_and_eval(): """Train and evaluate the model.""" model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir print('model directory = %s' % model_dir) est = build_estimator(model_dir) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) est.fit(x=mnist.train.images, y=mnist.train.labels, batch_size=FLAGS.batch_size) metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } results = est.score(x=mnist.test.images, y=mnist.test.labels, batch_size=FLAGS.batch_size, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def train_and_eval(): """Train and evaluate the model.""" model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir print('model directory = %s' % model_dir) estimator = build_estimator(model_dir) # TensorForest's loss hook allows training to terminate early if the # forest is no longer growing. early_stopping_rounds = 100 monitor = random_forest.TensorForestLossHook(early_stopping_rounds) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) estimator.fit(x=mnist.train.images, y=mnist.train.labels, batch_size=FLAGS.batch_size, monitors=[monitor]) metric_name = 'accuracy' metric = {metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name))} results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels, batch_size=FLAGS.batch_size, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def train_and_eval(): """Train and evaluate the model.""" model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir print('model directory = %s' % model_dir) est = build_estimator(model_dir) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) train_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.train.images}, y=mnist.train.labels.astype( numpy.int32), batch_size=FLAGS.batch_size, num_epochs=None, shuffle=True) est.fit(input_fn=train_input_fn, steps=None) metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } test_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.test.images}, y=mnist.test.labels.astype( numpy.int32), num_epochs=1, batch_size=FLAGS.batch_size, shuffle=False) results = est.evaluate(input_fn=test_input_fn, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def train_and_eval(): """Train and evaluate the model.""" model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir print('model directory = %s' % model_dir) est = build_estimator(model_dir) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) train_input_fn = numpy_io.numpy_input_fn( x={'images': mnist.train.images}, y=mnist.train.labels.astype(numpy.int32), batch_size=FLAGS.batch_size, num_epochs=None, shuffle=True) est.fit(input_fn=train_input_fn, steps=None) metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } test_input_fn = numpy_io.numpy_input_fn( x={'images': mnist.test.images}, y=mnist.test.labels.astype(numpy.int32), num_epochs=1, batch_size=FLAGS.batch_size, shuffle=False) results = est.evaluate(input_fn=test_input_fn, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def eval(est): """Evaluate the model.""" mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } test_input_fn = numpy_io.numpy_input_fn( x={'images': mnist.test.images}, y=mnist.test.labels.astype(numpy.int32), num_epochs=1, batch_size=FLAGS.batch_size, shuffle=False) results = est.evaluate(input_fn=test_input_fn, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def evaluate(self, data: np.ndarray, labels: np.ndarray): """Predicts and directly evaluates the results. Examples: To evaluate the prediction of the decision forest use: >>> results = forest.evaluate(data, labels) >>> for key in sorted(results): >>> print('%s: %s' % (key, results[key])) Args: data (np.ndarray): The data to predict. ``data.shape`` is ``(n, f)`` with ``n`` observation and ``f`` features per observation. labels (np.ndarray): The labels of the `data`. ``labels[i]`` returns the label of observation ``i``. ``labels.shape`` is ``(n, 1)`` with ``n`` observation and the associated labels. Returns: dict: A dict of evaluation metrics. """ if self.estimator is None: raise ValueError('Estimator not set') metrics = { 'accuracy': metric_spec.MetricSpec( eval_metrics.get_metric('accuracy'), prediction_key=eval_metrics.get_prediction_key('accuracy') ) } if self.report_feature_importances: metrics['feature_importance'] = metric_spec.MetricSpec( lambda x: x, prediction_key=eval_metrics.FEATURE_IMPORTANCE_NAME ) results = self.estimator.score(x=data, y=labels, batch_size=self.batch_size, metrics=metrics) return results
def train_and_eval(): """Train and evaluate the model.""" model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir print('model directory = %s' % model_dir) est = build_estimator(model_dir) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) est.fit(x=mnist.train.images, y=mnist.train.labels, batch_size=FLAGS.batch_size) metric_name = 'accuracy' metric = {metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name))} results = est.score(x=mnist.test.images, y=mnist.test.labels, batch_size=FLAGS.batch_size, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))
def train_and_eval(conf=None): global config # if an argument is provided, set config to this value - used for calling the method from outside of the file. # if no argument passed, then the arguments passed on the command line, as interpreted by the parser, are used. if conf: config = conf else: config = { 'train_data': train_data, 'train_labels': train_labels, 'test_data': test_data, 'test_labels': test_labels, 'num_classes': num_classes, 'num_features': num_features, 'num_trees': num_trees, 'max_nodes': max_nodes, 'train_steps': train_steps, 'batch_size': batch_size, 'bagging_fraction': bagging_fraction, 'feature_bagging_fraction': feature_bagging_fraction, 'model_dir': model_dir, 'delete_models': delete_models, 'data_dir': data_dir, 'use_training_loss': use_training_loss } # convert config dict into an object, for acceptance in the following lines config = objectview(config) # if a specific directory to store the generated model is specified in the arguments, use that # otherwise, use a temporary directory model_dir = tempfile.mkdtemp( ) if not config.model_dir else config.model_dir # load the training data and cast it to float32 if not config.train_data: sys.exit('Usage: --train_data <csv file>') train_data = loc_genfromtxt(config.train_data) train_data = train_data.astype(np.float32) if not config.train_labels: sys.exit('Usage: --train_labels <csv file>') train_labels = loc_genfromtxt(config.train_labels) train_labels = train_labels.astype(np.float32) # auto-detect number of features in training data # print('train_data has number of features/columns = ' + str(train_data.shape[1])) config.num_features = train_data.shape[1] # get a random forest estimator object est = build_estimator(model_dir) # fit the random forest model using the training data est.fit(x=train_data, y=train_labels, batch_size=config.batch_size) # load the test data and cast it to float32 if not config.test_data: sys.exit('Usage: --test_data <csv file>') test_data = loc_genfromtxt(config.test_data) test_data = test_data.astype(np.float32) if not config.test_labels: sys.exit('Usage: --test_labels <csv file>') test_labels = loc_genfromtxt(config.test_labels) test_labels = test_labels.astype(np.float32) # define the metric to be 'accuracy' metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } # calculate the score using the test results = est.score(x=test_data, y=test_labels, batch_size=config.batch_size, metrics=metric) # print each value with comma and space after it, except last value, which has line feed only i = 1 length = len(sorted(results)) for key in sorted(results): if i == length: print(str(results[key])) else: print(str(results[key]) + ',', end="") i = i + 1 # if flag set, delete model dir in order to free up space / avoid out of memory if config.delete_models: call(['rm', '-r', model_dir])
def train_and_eval(wisdmFilename='../data/wisdm.txt'): wisdm = read_data_sets(csv=wisdmFilename) all_data = wisdm.data # all_data is a Datasplit tuple in wisdm.py all_labels = wisdm.labels # all_labels is a Datasplit tuple in wisdm.py nclasses = wisdm.n_classes nfeatures = wisdm.n_features print(nclasses, ' classes from ', nfeatures, 'features') if FLAGS.estimator == 'tensorflow': """Train and evaluate the model.""" model_dir = FLAGS.model_dir or tempfile.mkdtemp() print('model directory = %s' % model_dir) tf_start = time.time() est = build_estimator(model_dir, nclasses, nfeatures) est.fit(x=all_data.train, y=all_labels.train, batch_size=FLAGS.batch_size) print('Done Fitting\n') metric_name = 'accuracy' mspec = metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) metric = {metric_name: mspec} results = est.score( x=all_data.test, y=all_labels.test, # batch_size=FLAGS.batch_size, metrics=metric) tf_end = time.time() for key in sorted(results): print('%s: %s' % (key, results[key])) print('tf time:', tf_end - tf_start) elif FLAGS.estimator == 'sklearn': print('--------- Next: sklearn RandomForestClassifier ---------') skrf_start = time.time() param_grid = [{ 'n_estimators': [10, 30, 90], 'max_features': [15, 25, 35, 43] }, { 'bootstrap': [False], 'n_estimators': [10, 30, 40], 'max_features': [16, 24, 43] }] fc = RandomForestClassifier() grid_search = GridSearchCV(fc, param_grid, cv=10, scoring='accuracy') grid_search.fit( np.concatenate([all_data.train, all_data.validation]), np.concatenate([all_labels.train, all_labels.validation])) skrf_end = time.time() print('Best params', grid_search.best_params_) print('skRF time:', skrf_end - skrf_start) for params, mean, std in grid_search.grid_scores_: print(mean, std, params) s = grid_search.score(X=all_data.test, y=all_labels.test) print('Test score:', s)
num_features = 59 num_trees = 4 max_nodes = 1000 # Random forest parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() classifier = random_forest.TensorForestEstimator(hparams) classifier.fit(input_fn=train_input_fn, steps=None) # Verify results metric_name = 'accuracy' metric = { metric_name: metric_spec.MetricSpec( eval_metrics.get_metric(metric_name), prediction_key=eval_metrics.get_prediction_key(metric_name)) } test_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': features[0:10]}, y=labels[0:10], num_epochs=None, shuffle=True) results = classifier.evaluate(input_fn=test_input_fn, metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key]))