예제 #1
0
def evaluate_metrics(inputs, sess, dataset, metrics):

    if dataset.batch_per_thread > 0:
        batch_size = dataset.batch_per_thread * dataset.get_num_partitions()
    else:
        batch_size = dataset.batch_size

    real_batch_size = tf.shape(inputs[0])[0]

    outputs, eval_methods = TFModel._process_metrics(
        inputs[0].graph, metrics=metrics, real_batch_size=real_batch_size)

    tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs)

    results = tfnet.evaluate(dataset, batch_size, eval_methods)
    final_result = dict([(r.method, r.result) for r in results])
    return final_result
예제 #2
0
    def _evaluate_distributed(self, dataset):

        tfnet = TFNet.from_session(K.get_session(),
                                   inputs=self.model.inputs,
                                   outputs=self.model.outputs)
        if dataset.batch_per_thread < 0:
            batch_size = dataset.batch_size
        else:
            batch_size = dataset.batch_per_thread * dataset.get_num_partitions()

        eval_methods = [to_bigdl_metric(m, self.model.loss)
                        for m in self.metrics_names]

        results = tfnet.evaluate(dataset, batch_size, eval_methods)
        final_result = [r.result for r in results]

        return final_result
예제 #3
0
    def __init__(self, sess, outputs, inputs=None, dataset=None):
        '''
        TFPredictor takes a list of TensorFlow tensors as the model outputs and
        feed all the elements in TFDatasets to produce those outputs and returns
        a Spark RDD with each of its elements representing the model prediction
        for the corresponding input elements.

        :param sess: the current TensorFlow Session, you should first use this session
        to load the trained variables then pass into TFPredictor
        :param outputs: the output tensors of the TensorFlow model
        '''
        if inputs is None:
            dataset, inputs = TFPredictor._get_datasets_and_inputs(outputs)

        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs
        self.tfnet = TFNet.from_session(sess, self.inputs, outputs)
        if self.dataset.batch_per_thread <= 0:
            raise ValueError("You should set batch_per_thread on TFDataset " +
                             "instead of batch_size for prediction")
예제 #4
0
    def predict(self, input_fn, predict_keys=None, checkpoint_path=None):
        """Outputs predictions for given features.

        :param input_fn: A function that constructs the features.
              * A `TFDataset` object, each elements of which is a tuple `(features, None)`.
              * A `tf.data.Dataset` object: Outputs of `Dataset` object must have
                same constraints as below.
              * features: A `tf.Tensor` or a dictionary of string feature name to
                `Tensor`. features are consumed by `model_fn`. They should satisfy
                the expectation of `model_fn` from inputs.
              * A tuple, in which case the first item is extracted as features.

        :param checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
            latest checkpoint in `model_dir` is used.  If there are no checkpoints
            in `model_dir`, prediction is run with newly initialized `Variables`
            instead of ones restored from checkpoint.


        Return:
          Evaluated values of `predictions` tensors.

        """
        import tensorflow as tf

        with tf.Graph().as_default() as g:
            result = self.estimator._call_input_fn(
                input_fn, tf.estimator.ModeKeys.PREDICT)
            if isinstance(result, TFDataset):
                spec = self._call_model_fn(result.feature_tensors, None,
                                           tf.estimator.ModeKeys.PREDICT,
                                           self.config)
                latest_checkpoint = self.estimator.latest_checkpoint()

                if latest_checkpoint:
                    checkpoint_path = latest_checkpoint

                with tf.Session() as sess:
                    if checkpoint_path:
                        saver = tf.train.Saver()
                        saver.restore(sess, checkpoint_path)
                    else:
                        sess.run(tf.global_variables_initializer())
                    inputs = nest.flatten(result._original_tensors[0])
                    if isinstance(spec.predictions,
                                  dict) and predict_keys is not None:
                        outputs = [
                            spec.predictions[key] for key in predict_keys
                        ]
                    else:
                        outputs = nest.flatten(spec.predictions)
                    tfnet = TFNet.from_session(sess,
                                               inputs=inputs,
                                               outputs=outputs)
                    predictions = tfnet.predict(result.get_prediction_data(),
                                                mini_batch=True)

                    # If predictions is a dict, add back the keys and results is a dict as well.
                    if isinstance(spec.predictions, dict):
                        # Given a list of outputs; return a dict of outputs.
                        def zip_key(outs, keys):
                            if isinstance(outs, list):
                                error_msg = "output length is " \
                                    + "{} but keys length is {}".format(len(outs), len(keys))
                                assert len(outs) == len(keys), error_msg
                            else:
                                outs = [outs]
                            res_dict = {}
                            for out, key in zip(outs, keys):
                                res_dict[key] = out
                            return res_dict

                        pred_keys = sorted(spec.predictions.keys()) if not predict_keys \
                            else predict_keys
                        predictions = predictions.map(
                            lambda res: zip_key(res, pred_keys))
                    return predictions

        return list(
            self.estimator.predict(input_fn, checkpoint_path=checkpoint_path))
예제 #5
0
    def evaluate(self,
                 input_fn,
                 eval_methods,
                 steps=None,
                 checkpoint_path=None):
        """Evaluates the model given evaluation data `input_fn`.

        :param input_fn: A function that constructs the input data for evaluation. The
            function should construct and return one of the following:
            * A `TFDataset` object, each elements of which is a tuple `(features, labels)`.
            * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
            `(features, labels)` with same constraints as below.
            * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
            of string feature name to `Tensor` and `labels` is a `Tensor` or a
            dictionary of string label name to `Tensor`. Both `features` and
            `labels` are consumed by `model_fn`. They should satisfy the expectation
            of `model_fn` from inputs.
        :param eval_methods: a list of strings to specify the evaluation metrics to
                            be used in this model
        :param steps: Number of steps for which to evaluate model.
        :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the
            latest checkpoint in `model_dir` is used.  If there are no checkpoints
            in `model_dir`, evaluation is run with newly initialized `Variables`
            instead of ones restored from checkpoint.

        Returns:
          A dict containing the evaluation metrics specified in `model_fn` keyed by
          name.
        """
        if not all(
                isinstance(metric, six.string_types)
                for metric in eval_methods):
            raise ValueError("All metrics should be string types")
        from tensorflow_estimator.python.estimator.canned import prediction_keys
        with tf.Graph().as_default() as g:
            result = self.estimator._call_input_fn(input_fn,
                                                   tf.estimator.ModeKeys.EVAL)
            if isinstance(result, TFDataset):
                spec = self._call_model_fn(result.feature_tensors,
                                           result.label_tensors,
                                           tf.estimator.ModeKeys.PREDICT,
                                           self.config)
                latest_checkpoint = self.estimator.latest_checkpoint()

                if latest_checkpoint:
                    checkpoint_path = latest_checkpoint

                with tf.Session() as sess:
                    if checkpoint_path:
                        saver = tf.train.Saver()
                        saver.restore(sess, checkpoint_path)
                    else:
                        sess.run(tf.global_variables_initializer())
                    inputs = nest.flatten(result._original_tensors[0])
                    if isinstance(spec.predictions, dict):
                        if "mae" in eval_methods:
                            outputs = [
                                spec.predictions[
                                    prediction_keys.PredictionKeys.PREDICTIONS]
                            ]
                        else:
                            outputs = [
                                spec.predictions[
                                    prediction_keys.PredictionKeys.LOGITS]
                            ]
                    else:
                        outputs = nest.flatten(spec.predictions)
                        if len(outputs) > 1:
                            raise Exception(
                                "Evaluate on more than one output is not " +
                                "supported now")
                    tfnet = TFNet.from_session(sess,
                                               inputs=inputs,
                                               outputs=outputs)

                    if result.batch_per_thread < 0:
                        batch_size = result.batch_size
                    else:
                        batch_size = result.batch_per_thread * result.get_num_partitions(
                        )

                    eval_methods = [
                        self._to_bigdl_metric(m) for m in eval_methods
                    ]
                    results = tfnet.evaluate(result, batch_size, eval_methods)
                    final_result = dict([(r.method, r.result)
                                         for r in results])
                    return final_result

        return self.estimator.evaluate(input_fn,
                                       steps,
                                       checkpoint_path=checkpoint_path)