Exemplo n.º 1
0
def _get_default_metrics(problem_type, prediction_type, sequence_length):
  """Returns default `MetricSpec`s for `problem_type` and `prediction_type`.

  Args:
    problem_type: `ProblemType.CLASSIFICATION` or`ProblemType.REGRESSION`.
    prediction_type: `PredictionType.SINGLE_VALUE` or
      `PredictionType.MULTIPLE_VALUE`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
  Returns:
    A `dict` mapping strings to `MetricSpec`s.
  """
  default_metrics = {}
  if problem_type == ProblemType.CLASSIFICATION:
    # Multi value classification
    if prediction_type == PredictionType.MULTIPLE_VALUE:
      default_metrics['accuracy'] = metric_spec.MetricSpec(
          metric_fn=_mask_multivalue(
              sequence_length, metrics.streaming_accuracy),
          prediction_key=RNNKeys.PREDICTIONS_KEY)
    # Single value classification
    elif prediction_type == PredictionType.SINGLE_VALUE:
      default_metrics['accuracy'] = metric_spec.MetricSpec(
          metric_fn=metrics.streaming_accuracy,
          prediction_key=RNNKeys.PREDICTIONS_KEY)
  elif problem_type == ProblemType.REGRESSION:
    # Multi value regression
    if prediction_type == PredictionType.MULTIPLE_VALUE:
      pass
    # Single value regression
    elif prediction_type == PredictionType.SINGLE_VALUE:
      pass
  return default_metrics
Exemplo n.º 2
0
  def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None,
               batch_size=None, steps=None, metrics=None, name=None,
               range_k=None):
    # pylint: disable=g-doc-args,g-doc-return-or-yield
    """See evaluable.Evaluable for a description of the Args.

    range_k: A list of numbers where precision and recall have to be obtained.
      For eg. range_k=[1,5] will calculate precision@1, precision@5,
      recall@1 and recall@5.
    """
    # Setup the default metrics if metrics are not specified - precision@1,
    # recall@1 and precision@top_k and recall@top_k if top_k is
    # greater than 1.
    if not metrics:
      metrics = {}
      if range_k is None:
        if self._top_k > 1:
          range_k = [1, self._top_k]
        else:
          range_k = [1]
      for k in range_k:
        metrics.update({
            "precision_at_%d" % k: metric_spec.MetricSpec(
                metric_fn=functools.partial(
                    metric_ops.streaming_sparse_precision_at_k, k=k),
                prediction_key=_PROBABILITIES,)})
        metrics.update({
            "recall_at_%d" % k: metric_spec.MetricSpec(
                metric_fn=functools.partial(
                    metric_ops.streaming_sparse_recall_at_k, k=k),
                prediction_key=_PROBABILITIES,)})

    return self._estimator.evaluate(x=x, y=y, input_fn=input_fn,
                                    feed_fn=feed_fn, batch_size=batch_size,
                                    steps=steps, metrics=metrics, name=name)
Exemplo n.º 3
0
 def evaluate(self,
              x=None,
              y=None,
              input_fn=None,
              feed_fn=None,
              batch_size=None,
              steps=None,
              metrics=None,
              name=None):
     """See evaluable.Evaluable."""
     if metrics is None:
         metrics = {}
     metrics.update({
         "accuracy":
         metric_spec.MetricSpec(metric_fn=metrics_lib.streaming_accuracy,
                                prediction_key=_CLASSES,
                                weight_key=self._weight_column_name)
     })
     if self._n_classes == 2:
         metrics.update({
             "auc":
             metric_spec.MetricSpec(metric_fn=metrics_lib.streaming_auc,
                                    prediction_key=_LOGISTIC,
                                    weight_key=self._weight_column_name)
         })
     return self._estimator.evaluate(x=x,
                                     y=y,
                                     input_fn=input_fn,
                                     feed_fn=feed_fn,
                                     batch_size=batch_size,
                                     steps=steps,
                                     metrics=metrics,
                                     name=name)
Exemplo n.º 4
0
  def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None,
               batch_size=None, steps=None, metrics=None, name=None):
    """See evaluable.Evaluable."""
    if not metrics:
      metrics = {}
      metrics["accuracy"] = metric_spec.MetricSpec(
          metric_fn=metrics_lib.streaming_accuracy,
          prediction_key=linear._CLASSES)
    additional_metrics = (
        target_column.get_default_binary_metrics_for_eval([0.5]))
    additional_metrics = {
        name: metric_spec.MetricSpec(metric_fn=metric,
                                     prediction_key=linear._LOGISTIC)
        for name, metric in additional_metrics.items()
    }
    metrics.update(additional_metrics)

    # TODO(b/31229024): Remove this loop
    for metric_name, metric in metrics.items():
      if isinstance(metric, metric_spec.MetricSpec):
        continue

      if isinstance(metric_name, tuple):
        if len(metric_name) != 2:
          raise ValueError("Ignoring metric %s. It returned a tuple with len  "
                           "%s, expected 2." % (metric_name, len(metric_name)))

        valid_keys = {linear._CLASSES, linear._LOGISTIC, linear._PROBABILITIES}
        if metric_name[1] not in valid_keys:
          raise ValueError("Ignoring metric %s. The 2nd element of its name "
                           "should be in %s" % (metric_name, valid_keys))
      metrics[metric_name] = linear._wrap_metric(metric)
    return self._estimator.evaluate(x=x, y=y, input_fn=input_fn,
                                    feed_fn=feed_fn, batch_size=batch_size,
                                    steps=steps, metrics=metrics, name=name)
def train_and_eval():
    """Train and evaluate the model."""

    model_dir = 'data/model'
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets('MNIST_data', one_hot=False)

    with tf.device('/gpu:0'):
        est.fit(x=mnist.train.images,
                y=mnist.train.labels,
                batch_size=100,
                steps=10)

        # results2=est.predict(x=mnist.test.images, y=mnist.test.labels, batch_size=100)
        # print(results2)

        metric_name = 'accuracy'
        metric = {
            metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))
        }

        results = est.score(x=mnist.test.images,
                            y=mnist.test.labels,
                            batch_size=100)
        for key in sorted(results):
            print('%s: %s' % (key, results[key]))
Exemplo n.º 6
0
def rf_train(x_train, y_train, x_test, y_test):
    params = tensor_forest.ForestHParams(num_classes=10,
                                         num_features=784,
                                         num_trees=100,
                                         max_nodes=10000)

    graph_builder_class = tensor_forest.TrainingLossForest

    est = estimator.SKCompat(
        random_forest.TensorForestEstimator(
            params,
            graph_builder_class=graph_builder_class,
            model_dir="./models"))

    est.fit(x=x_train, y=y_train, batch_size=128)

    metric_name = "accuracy"

    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    results = est.score(x=x_test, y=y_test, batch_size=128, metrics=metric)

    for key in sorted(results):
        print("%s: %s" % (key, results[key]))
Exemplo n.º 7
0
def train_and_eval(config):
    """Train and evaluate the model."""
    print 'model directory = %s' % config.model_output

    num_features = 1e3
    model = train_rf(num_features, config)

    # Early stopping if the forest is no longer growing.
    monitor = random_forest.TensorForestLossHook(config.early_stopping_rounds)

    # TFLearn doesn't support tfrecords; extract them by hand for now
    img, label, feat = get_records(
        os.path.join(config.tfrecord_dir, 'train.tfrecords'))
    model.fit(
        x=feat, y=label,
        batch_size=config.batch_size, monitors=[monitor])

    metric_name = 'accuracy'
    metric = {metric_name: metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))}

    test_img, test_label, test_feat = get_records(
        os.path.join(config.tfrecord_dir, 'val.tfrecords'))
    results = model.evaluate(
        x=test_img, y=test_label,
        batch_size=config.batch_size, metrics=metric)
    return results
Exemplo n.º 8
0
 def _add_binary_metric(key, metric_fn):
     metrics[_head_prefixed(self._head_name,
                            key)] = (metric_spec.MetricSpec(
                                metric_fn,
                                prediction_key.PredictionKey.LOGISTIC,
                                self._label_name,
                                self._weight_column_name))
def train_and_eval():
    """Train and evaluate the model."""
    model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

    est.fit(x=mnist.train.images,
            y=mnist.train.labels,
            batch_size=FLAGS.batch_size)

    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    results = est.score(x=mnist.test.images,
                        y=mnist.test.labels,
                        batch_size=FLAGS.batch_size,
                        metrics=metric)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Exemplo n.º 10
0
def train_and_eval():
  """Train and evaluate the model."""
  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
  print('model directory = %s' % model_dir)

  estimator = build_estimator(model_dir)

  # TensorForest's loss hook allows training to terminate early if the
  # forest is no longer growing.
  early_stopping_rounds = 100
  monitor = random_forest.TensorForestLossHook(early_stopping_rounds)

  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

  estimator.fit(x=mnist.train.images, y=mnist.train.labels,
                batch_size=FLAGS.batch_size, monitors=[monitor])

  metric_name = 'accuracy'
  metric = {metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))}

  results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels,
                               batch_size=FLAGS.batch_size,
                               metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
def train_and_eval():
    """Train and evaluate the model."""
    model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

    train_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.train.images},
                                             y=mnist.train.labels.astype(
                                                 numpy.int32),
                                             batch_size=FLAGS.batch_size,
                                             num_epochs=None,
                                             shuffle=True)
    est.fit(input_fn=train_input_fn, steps=None)

    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    test_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.test.images},
                                            y=mnist.test.labels.astype(
                                                numpy.int32),
                                            num_epochs=1,
                                            batch_size=FLAGS.batch_size,
                                            shuffle=False)

    results = est.evaluate(input_fn=test_input_fn, metrics=metric)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Exemplo n.º 12
0
 def _get_eval_metrics(model_type):
     """Returns a dict of 'string' to 'MetricSpec' objects."""
     classes_prediction_key = "classes"
     if model_type == RANDOM_FOREST:
         classes_prediction_key = "predictions"
     eval_metrics = {}
     eval_metrics["accuracy"] = metric_spec.MetricSpec(
         prediction_key=classes_prediction_key,
         metric_fn=tf.contrib.metrics.streaming_accuracy)
     eval_metrics["precision"] = metric_spec.MetricSpec(
         prediction_key=classes_prediction_key,
         metric_fn=tf.contrib.metrics.streaming_precision)
     eval_metrics["recall"] = metric_spec.MetricSpec(
         prediction_key=classes_prediction_key,
         metric_fn=tf.contrib.metrics.streaming_recall)
     return eval_metrics
Exemplo n.º 13
0
def _weighted_average_loss_metric_spec(loss_fn, predictoin_key, label_key,
                                       weight_key):
    def _streaming_weighted_average_loss(predictions, target, weights=None):
        loss_unweighted = loss_fn(predictions, target)
        _, weighted_average_loss = _loss(loss_unweighted,
                                         weights,
                                         name="eval_loss")
        return metrics_lib.streaming_mean(weighted_average_loss)

    return metric_spec.MetricSpec(_streaming_weighted_average_loss,
                                  predictoin_key, label_key, weight_key)
Exemplo n.º 14
0
    def _default_metrics(self):
        """Returns a dict of `MetricSpec` objects keyed by name."""
        metrics = {
            _head_prefixed(self._head_name, metric_key.MetricKey.LOSS):
            _weighted_average_loss_metric_spec(
                self._eval_loss_fn, prediction_key.PredictionKey.LOGITS,
                self._label_name, self._weight_column_name)
        }

        # TODO(b/29366811): This currently results in both an "accuracy" and an
        # "accuracy/threshold_0.500000_mean" metric for binary classification.
        metrics[_head_prefixed(
            self._head_name,
            metric_key.MetricKey.ACCURACY)] = (metric_spec.MetricSpec(
                metrics_lib.streaming_accuracy,
                prediction_key.PredictionKey.CLASSES, self._label_name,
                self._weight_column_name))
        if self.logits_dimension == 1:

            def _add_binary_metric(key, metric_fn):
                metrics[_head_prefixed(
                    self._head_name, key)] = (metric_spec.MetricSpec(
                        metric_fn, prediction_key.PredictionKey.LOGISTIC,
                        self._label_name, self._weight_column_name))

            _add_binary_metric(metric_key.MetricKey.PREDICTION_MEAN,
                               _predictions_streaming_mean)
            _add_binary_metric(metric_key.MetricKey.LABEL_MEAN,
                               _labels_streaming_mean)

            # Also include the streaming mean of the label as an accuracy baseline, as
            # a reminder to users.
            _add_binary_metric(metric_key.MetricKey.ACCURACY_BASELINE,
                               _labels_streaming_mean)

            _add_binary_metric(metric_key.MetricKey.AUC, _streaming_auc)

            for threshold in self._thresholds:
                _add_binary_metric(
                    metric_key.MetricKey.ACCURACY_MEAN % threshold,
                    _accuracy_at_threshold(threshold))
                # Precision for positive examples.
                _add_binary_metric(
                    metric_key.MetricKey.PRECISION_MEAN % threshold,
                    _streaming_at_threshold(
                        metrics_lib.streaming_precision_at_thresholds,
                        threshold),
                )
                # Recall for positive examples.
                _add_binary_metric(
                    metric_key.MetricKey.RECALL_MEAN % threshold,
                    _streaming_at_threshold(
                        metrics_lib.streaming_recall_at_thresholds, threshold))
        return metrics
Exemplo n.º 15
0
 def _default_metric(self):
   metrics = {_head_prefixed(self._head_name, MetricKey.LOSS):
              _weighted_average_loss_metric_spec(self._eval_loss_fn,
                                                 PredictionKey.LOGITS,
                                                 self._label_name,
                                                 self._weight_column_name)}
   metrics[_head_prefixed(self._head_name, MetricKey.ACCURACY)] = (
       metric_spec.MetricSpec(metrics_lib.streaming_accuracy,
                              PredictionKey.CLASSES, self._label_name,
                              self._weight_column_name))
   # TODO(sibyl-vie3Poto): add more metrics relevant for svms.
   return metrics
Exemplo n.º 16
0
def _weighted_average_loss_metric_spec(loss_fn, predictoin_key,
                                       label_key, weight_key):
  def _streaming_weighted_average_loss(predictions, labels, weights=None):
    loss_unweighted = loss_fn(predictions, labels)
    if weights is not None:
      weights = math_ops.to_float(weights)
    _, weighted_average_loss = _loss(loss_unweighted,
                                     weights,
                                     name="eval_loss")
    return metrics_lib.streaming_mean(weighted_average_loss)
  return metric_spec.MetricSpec(_streaming_weighted_average_loss,
                                predictoin_key, label_key, weight_key)
Exemplo n.º 17
0
    def evaluate(self, data: np.ndarray, labels: np.ndarray):
        """Predicts and directly evaluates the results.

        Examples:
            To evaluate the prediction of the decision forest use:

            >>> results = forest.evaluate(data, labels)
            >>> for key in sorted(results):
            >>>     print('%s: %s' % (key, results[key]))

        Args:
            data (np.ndarray): The data to predict. ``data.shape`` is ``(n, f)`` with ``n`` observation and
                ``f`` features per observation.
            labels (np.ndarray): The labels of the `data`. ``labels[i]`` returns the label of observation ``i``.
                ``labels.shape`` is ``(n, 1)`` with ``n`` observation and the associated labels.
        Returns:
            dict: A dict of evaluation metrics.
        """

        if self.estimator is None:
            raise ValueError('Estimator not set')

        metrics = {
            'accuracy': metric_spec.MetricSpec(
                eval_metrics.get_metric('accuracy'),
                prediction_key=eval_metrics.get_prediction_key('accuracy')
            )
        }

        if self.report_feature_importances:
            metrics['feature_importance'] = metric_spec.MetricSpec(
                lambda x: x,
                prediction_key=eval_metrics.FEATURE_IMPORTANCE_NAME
            )

        results = self.estimator.score(x=data, y=labels, batch_size=self.batch_size,
                                       metrics=metrics)

        return results
 def testIrisInputFnLabelsDict(self):
     iris = base.load_iris()
     est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
     est.fit(input_fn=iris_input_fn_labels_dict, steps=100)
     _ = est.evaluate(input_fn=iris_input_fn_labels_dict,
                      steps=1,
                      metrics={
                          'accuracy':
                          metric_spec.MetricSpec(
                              metric_fn=metric_ops.streaming_accuracy,
                              prediction_key='class',
                              label_key='labels')
                      })
     predictions = list(est.predict(x=iris.data))
     self.assertEqual(len(predictions), iris.target.shape[0])
Exemplo n.º 19
0
 def testIrisInputFnTargetIsDict(self):
     iris = tf.contrib.learn.datasets.load_iris()
     est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn)
     est.fit(input_fn=iris_input_fn_target_dict, steps=100)
     _ = est.evaluate(
         input_fn=iris_input_fn_target_dict,
         steps=1,
         metrics={
             'accuracy':
             metric_spec.MetricSpec(
                 metric_fn=tf.contrib.metrics.streaming_accuracy,
                 prediction_key='class',
                 label_key='target')
         })
     predictions = list(est.predict(x=iris.data))
     self.assertEqual(len(predictions), iris.target.shape[0])
Exemplo n.º 20
0
def main(arffindex):

    farffs = open("./arffFileList.txt", 'r')
    arfflist = farffs.readlines()
    farffs.close()
    filename = "/media/ktg/New Volume/AndroidMalPaper/Arff/"
    filename += arfflist[arffindex][:-1]
    print(filename)

    with tf.device('/gpu:0'):

        X_train, X_test, Y_train, Y_test, featNos = dataloading(
            filename, False, False)
        print('data loading: done')

        ks = list()
        for i in range(X_train.shape[1]):
            ks.append(str(i))
        cname = "feats"
        sparceCol = tf.contrib.layers.sparse_column_with_keys(
            column_name=cname, keys=ks)
        est = SVM(example_id_column=str(arffindex),
                  feature_columns=[sparceCol])
        est.fit(x=X_train,
                y=np.reshape(Y_train, (Y_train.shape[0], 1)),
                batch_size=BATCH_SIZE)

        metric_name = 'accuracy'
        metric = {
            metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))
        }
        results = est.evaluate(x=X_test,
                               y=np.reshape(Y_test, (Y_test.shape[0], 1)),
                               batch_size=FLAGS.batch_size,
                               metrics=metric)

        for key in sorted(results):
            print('%s: %s' % (key, results[key]))
        exit()
Exemplo n.º 21
0
    def testFeatureEngineeringFn(self):
        def input_fn():
            return {
                "x": constant_op.constant([1.])
            }, {
                "y": constant_op.constant([11.])
            }

        def feature_engineering_fn(features, labels):
            _, _ = features, labels
            return {
                "transformed_x": constant_op.constant([9.])
            }, {
                "transformed_y": constant_op.constant([99.])
            }

        def model_fn(features, labels):
            # dummy variable:
            _ = variables_lib.Variable([0.])
            _ = labels
            predictions = features["transformed_x"]
            loss = constant_op.constant([2.])
            update_global_step = variables.get_global_step().assign_add(1)
            return predictions, loss, update_global_step

        estimator = estimator_lib.Estimator(
            model_fn=model_fn, feature_engineering_fn=feature_engineering_fn)
        estimator.fit(input_fn=input_fn, steps=1)
        prediction = next(
            estimator.predict(input_fn=input_fn, as_iterable=True))
        # predictions = transformed_x (9)
        self.assertEqual(9., prediction)
        metrics = estimator.evaluate(
            input_fn=input_fn,
            steps=1,
            metrics={
                "label":
                metric_spec.MetricSpec(lambda predictions, labels: labels)
            })
        # labels = transformed_y (99)
        self.assertEqual(99., metrics["label"])
def _get_default_metrics(problem_type, sequence_length):
    """Returns default `MetricSpec`s for `problem_type`.

  Args:
    problem_type: `ProblemType.CLASSIFICATION` or
    `ProblemType.LINEAR_REGRESSION`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
  Returns:
    A `dict` mapping strings to `MetricSpec`s.
  """
    default_metrics = {}
    if problem_type == constants.ProblemType.CLASSIFICATION:
        default_metrics['accuracy'] = metric_spec.MetricSpec(
            metric_fn=_mask_multivalue(sequence_length,
                                       metrics.streaming_accuracy),
            prediction_key=prediction_key.PredictionKey.CLASSES)
    elif problem_type == constants.ProblemType.LINEAR_REGRESSION:
        pass
    return default_metrics
Exemplo n.º 23
0
    def _default_metrics(self):
        """Returns a dict of `MetricSpec` objects keyed by name."""
        metrics = {
            _head_prefixed(self._head_name, metric_key.MetricKey.LOSS):
            _weighted_average_loss_metric_spec(
                self._loss_fn, prediction_key.PredictionKey.LOGITS,
                self._label_name, self._weight_column_name)
        }

        # TODO(b/29366811): This currently results in both an "accuracy" and an
        # "accuracy/threshold_0.500000_mean" metric for binary classification.
        metrics[_head_prefixed(
            self._head_name,
            metric_key.MetricKey.ACCURACY)] = (metric_spec.MetricSpec(
                metrics_lib.streaming_accuracy,
                prediction_key.PredictionKey.CLASSES, self._label_name,
                self._weight_column_name))

        # TODO(b/32953199): Add multiclass metrics.

        return metrics
Exemplo n.º 24
0
def eval(est):
  """Evaluate the model."""
  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)
  metric_name = 'accuracy'
  metric = {
      metric_name:
          metric_spec.MetricSpec(
              eval_metrics.get_metric(metric_name),
              prediction_key=eval_metrics.get_prediction_key(metric_name))
  }

  test_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.test.images},
      y=mnist.test.labels.astype(numpy.int32),
      num_epochs=1,
      batch_size=FLAGS.batch_size,
      shuffle=False)

  results = est.evaluate(input_fn=test_input_fn, metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
Exemplo n.º 25
0
    def testFeatureEngineeringFnWithSameName(self):
        def input_fn():
            return {
                "x": constant_op.constant(["9."])
            }, {
                "y": constant_op.constant(["99."])
            }

        def feature_engineering_fn(features, labels):
            # Github #12205: raise a TypeError if called twice.
            _ = string_ops.string_split(features["x"])
            features["x"] = constant_op.constant([9.])
            labels["y"] = constant_op.constant([99.])
            return features, labels

        def model_fn(features, labels):
            # dummy variable:
            _ = variables_lib.Variable([0.])
            _ = labels
            predictions = features["x"]
            loss = constant_op.constant([2.])
            update_global_step = variables.get_global_step().assign_add(1)
            return predictions, loss, update_global_step

        estimator = estimator_lib.Estimator(
            model_fn=model_fn, feature_engineering_fn=feature_engineering_fn)
        estimator.fit(input_fn=input_fn, steps=1)
        prediction = next(
            estimator.predict(input_fn=input_fn, as_iterable=True))
        # predictions = transformed_x (9)
        self.assertEqual(9., prediction)
        metrics = estimator.evaluate(
            input_fn=input_fn,
            steps=1,
            metrics={
                "label":
                metric_spec.MetricSpec(lambda predictions, labels: labels)
            })
        # labels = transformed_y (99)
        self.assertEqual(99., metrics["label"])
Exemplo n.º 26
0
        else:
            train_op = None

            print("Training not build ")

        predictions = {
            "probabilities": tf.nn.softmax(result, name="softmax_tensor"),
            "loss": loss
        }
        return predictions, loss, train_op

    # return model_fn_lib.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op)

    return cnn_model


def metrics_wr(values, omit):
    weights = tf.ones(shape=())

    return metric_ops.streaming_mean(values, weights)


METRICS = {
    'loss':
    metric_spec.MetricSpec(metric_fn=metrics_wr,
                           prediction_key='loss',
                           weight_key=None,
                           label_key=None)
}
Exemplo n.º 27
0
def custom_model_help(model_fn,
                      input_data,
                      project_id,
                      job_id,
                      user_ID,
                      result_dir,
                      result_sds,
                      est_params=None,
                      fit_params=None,
                      eval_params=None,
                      logging_flag=True):

    tf.logging.set_verbosity(tf.logging.INFO)
    # pass result staging data set for logger to save results
    logger = logging.getLogger('tensorflow')
    logger.setLevel(logging.DEBUG)
    if logging_flag:
        # add logger only when logging flag set to true
        logger.addHandler(mh)

    # init training logger
    training_logger = logger_service.TrainingLogger(
        fit_params['args']['steps'], project_id, job_id, user_ID, result_sds)

    mh.training_logger = training_logger

    # input_data 是一整个数据集,分割 为训练集和测试集
    # X_train, X_test, y_train, y_test = train_test_split(
    #     input_data['df_features'], input_data['df_labels'],
    #     test_size=0.20,
    #     random_state=42)

    # input_data 已分割 为训练集和测试集
    X_train, X_test, y_train, y_test = \
        input_data['x_tr'], input_data['x_te'], \
        input_data['y_tr'], input_data['y_te']

    train_input_fn = get_input_fn(model_name=input_data['model_name'],
                                  df_features=X_train,
                                  df_labels=y_train)
    eval_input_fn = get_input_fn(model_name=input_data['model_name'],
                                 df_features=X_test,
                                 df_labels=y_test)
    if ((input_data['model_name'] in ['Linear Classifier', 'Random Forest'])
        and est_params['args']['num_classes'] == 2) or \
                    input_data['model_name'] == 'SVM':
        validation_metrics = {
            "acc":
            metric_spec.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_accuracy,
                prediction_key=None),
            "precision":
            metric_spec.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_precision,
                prediction_key=None),
            "recall":
            metric_spec.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_recall,
                prediction_key=None),
            "confusion_matrix":
            metric_spec.MetricSpec(
                metric_fn=tf.contrib.metrics.confusion_matrix,
                prediction_key=None)
        }
    else:
        validation_metrics = {}

    val_monitor = ValidationMonitor(input_fn=eval_input_fn,
                                    eval_steps=1,
                                    every_n_steps=100,
                                    metrics=validation_metrics,
                                    name='val')

    tra_monitor = ValidationMonitor(input_fn=train_input_fn,
                                    eval_steps=1,
                                    every_n_steps=100,
                                    metrics=validation_metrics,
                                    name='tra')

    # init model
    estimator = tf.contrib.learn.Estimator(
        model_fn=model_fn,
        model_dir=None,
        config=tf.contrib.learn.RunConfig(save_checkpoints_steps=100),
        params=est_params['args'])

    # fit
    # result = {}
    # evaluation_times = max(fit_params['args']['steps'] / 100, 1)
    # while evaluation_times > 0:
    #     fit_params['args']['steps'] = 100
    #     estimator.fit(input_fn=train_input_fn, monitors=[
    # validation_monitor], **fit_params['args'])
    #     # evaluate
    #     metrics = estimator.evaluate(input_fn=eval_input_fn,
    #                                  **eval_params['args'])
    #     result.update({
    #         'eval_metrics': metrics
    #     })
    #     evaluation_times -= 1

    # fit
    estimator.fit(input_fn=train_input_fn,
                  monitors=[val_monitor, tra_monitor],
                  **fit_params['args'])
    # evaluate
    metrics = estimator.evaluate(input_fn=eval_input_fn,
                                 metrics=validation_metrics,
                                 **eval_params['args'])

    result = {}
    result.update({'eval_metrics': metrics})

    # predict
    predict_feature = input_data.get('predict', None)
    if predict_feature:
        predictions = estimator.predict(predict_feature, as_iterable=True)
        result['predictions'] = predictions

    if logging_flag:
        # export saved model
        features = {
            k: constant_op.constant(X_train[k].values,
                                    shape=[X_train.shape[0], 1],
                                    dtype=dtypes.float32)
            for k in X_train.columns
        }
        serving_input_fn = input_fn_utils.build_default_serving_input_fn(
            features)
        saved_model_path = estimator.export_savedmodel(
            os.path.abspath(result_dir), serving_input_fn)

        # add saved_model_path to result staging data set
        staging_data_set_business.update(
            result_sds.id, saved_model_path=saved_model_path.decode('ascii'))
    return result
Exemplo n.º 28
0
            # Create a variable to track the global step.
            # Use the optimizer to apply the gradients that minimize the loss
            # (and also increment the global step counter) as a single training step.
            train_op = optimizer.minimize(loss_op, global_step=global_step)
            # Add streaming means.
        else:
            train_op = None

        return tensors, loss_op, train_op

    return model_fn


METRICS = {
    'loss':
    metric_spec.MetricSpec(metric_fn=metric_ops.streaming_mean,
                           prediction_key='loss'),
    'accuracy':
    metric_spec.MetricSpec(metric_fn=metric_ops.streaming_mean,
                           prediction_key='accuracy')
}


def inference(images, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference.

  Args:
    images: Images placeholder, from inputs().
    hidden1_units: Size of the first hidden layer.
    hidden2_units: Size of the second hidden layer.
  Returns:
    softmax_linear: Output tensor with the computed logits.
def train_and_eval(conf=None):
    global config

    # if an argument is provided, set config to this value - used for calling the method from outside of the file.
    # if no argument passed, then the arguments passed on the command line, as interpreted by the parser, are used.
    if conf:
        config = conf
    else:
        config = {
            'train_data': train_data,
            'train_labels': train_labels,
            'test_data': test_data,
            'test_labels': test_labels,
            'num_classes': num_classes,
            'num_features': num_features,
            'num_trees': num_trees,
            'max_nodes': max_nodes,
            'train_steps': train_steps,
            'batch_size': batch_size,
            'bagging_fraction': bagging_fraction,
            'feature_bagging_fraction': feature_bagging_fraction,
            'model_dir': model_dir,
            'delete_models': delete_models,
            'data_dir': data_dir,
            'use_training_loss': use_training_loss
        }

    # convert config dict into an object, for acceptance in the following lines
    config = objectview(config)

    # if a specific directory to store the generated model is specified in the arguments, use that
    # otherwise, use a temporary directory
    model_dir = tempfile.mkdtemp(
    ) if not config.model_dir else config.model_dir

    # load the training data and cast it to float32
    if not config.train_data:
        sys.exit('Usage: --train_data <csv file>')
    train_data = loc_genfromtxt(config.train_data)
    train_data = train_data.astype(np.float32)

    if not config.train_labels:
        sys.exit('Usage: --train_labels <csv file>')
    train_labels = loc_genfromtxt(config.train_labels)
    train_labels = train_labels.astype(np.float32)

    # auto-detect number of features in training data
    # print('train_data has number of features/columns = ' + str(train_data.shape[1]))
    config.num_features = train_data.shape[1]

    # get a random forest estimator object
    est = build_estimator(model_dir)

    # fit the random forest model using the training data
    est.fit(x=train_data, y=train_labels, batch_size=config.batch_size)

    # load the test data and cast it to float32
    if not config.test_data:
        sys.exit('Usage: --test_data <csv file>')
    test_data = loc_genfromtxt(config.test_data)
    test_data = test_data.astype(np.float32)

    if not config.test_labels:
        sys.exit('Usage: --test_labels <csv file>')
    test_labels = loc_genfromtxt(config.test_labels)
    test_labels = test_labels.astype(np.float32)

    # define the metric to be 'accuracy'
    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    # calculate the score using the test
    results = est.score(x=test_data,
                        y=test_labels,
                        batch_size=config.batch_size,
                        metrics=metric)

    # print each value with comma and space after it, except last value, which has line feed only
    i = 1
    length = len(sorted(results))
    for key in sorted(results):
        if i == length:
            print(str(results[key]))
        else:
            print(str(results[key]) + ',', end="")
        i = i + 1

    # if flag set, delete model dir in order to free up space / avoid out of memory
    if config.delete_models:
        call(['rm', '-r', model_dir])
Exemplo n.º 30
0
 def _add_binary_metric(metric_key, metric_fn):
     metrics[_head_prefixed(self._head_name,
                            metric_key)] = (metric_spec.MetricSpec(
                                metric_fn, PedictionKey.LOGISTIC,
                                self._label_name))