def evaluate_metrics(inputs, sess, dataset, metrics): if dataset.batch_per_thread > 0: batch_size = dataset.batch_per_thread * dataset.get_num_partitions() else: batch_size = dataset.batch_size real_batch_size = tf.shape(inputs[0])[0] outputs, eval_methods = TFModel._process_metrics( inputs[0].graph, metrics=metrics, real_batch_size=real_batch_size) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) results = tfnet.evaluate(dataset, batch_size, eval_methods) final_result = dict([(r.method, r.result) for r in results]) return final_result
def _evaluate_distributed(self, dataset): tfnet = TFNet.from_session(K.get_session(), inputs=self.model.inputs, outputs=self.model.outputs) if dataset.batch_per_thread < 0: batch_size = dataset.batch_size else: batch_size = dataset.batch_per_thread * dataset.get_num_partitions() eval_methods = [to_bigdl_metric(m, self.model.loss) for m in self.metrics_names] results = tfnet.evaluate(dataset, batch_size, eval_methods) final_result = [r.result for r in results] return final_result
def __init__(self, sess, outputs, inputs=None, dataset=None): ''' TFPredictor takes a list of TensorFlow tensors as the model outputs and feed all the elements in TFDatasets to produce those outputs and returns a Spark RDD with each of its elements representing the model prediction for the corresponding input elements. :param sess: the current TensorFlow Session, you should first use this session to load the trained variables then pass into TFPredictor :param outputs: the output tensors of the TensorFlow model ''' if inputs is None: dataset, inputs = TFPredictor._get_datasets_and_inputs(outputs) self.sess = sess self.dataset = dataset self.inputs = inputs self.tfnet = TFNet.from_session(sess, self.inputs, outputs) if self.dataset.batch_per_thread <= 0: raise ValueError("You should set batch_per_thread on TFDataset " + "instead of batch_size for prediction")
def predict(self, input_fn, predict_keys=None, checkpoint_path=None): """Outputs predictions for given features. :param input_fn: A function that constructs the features. * A `TFDataset` object, each elements of which is a tuple `(features, None)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must have same constraints as below. * features: A `tf.Tensor` or a dictionary of string feature name to `Tensor`. features are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. * A tuple, in which case the first item is extracted as features. :param checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, prediction is run with newly initialized `Variables` instead of ones restored from checkpoint. Return: Evaluated values of `predictions` tensors. """ import tensorflow as tf with tf.Graph().as_default() as g: result = self.estimator._call_input_fn( input_fn, tf.estimator.ModeKeys.PREDICT) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, None, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) if isinstance(spec.predictions, dict) and predict_keys is not None: outputs = [ spec.predictions[key] for key in predict_keys ] else: outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) predictions = tfnet.predict(result.get_prediction_data(), mini_batch=True) # If predictions is a dict, add back the keys and results is a dict as well. if isinstance(spec.predictions, dict): # Given a list of outputs; return a dict of outputs. def zip_key(outs, keys): if isinstance(outs, list): error_msg = "output length is " \ + "{} but keys length is {}".format(len(outs), len(keys)) assert len(outs) == len(keys), error_msg else: outs = [outs] res_dict = {} for out, key in zip(outs, keys): res_dict[key] = out return res_dict pred_keys = sorted(spec.predictions.keys()) if not predict_keys \ else predict_keys predictions = predictions.map( lambda res: zip_key(res, pred_keys)) return predictions return list( self.estimator.predict(input_fn, checkpoint_path=checkpoint_path))
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): """Evaluates the model given evaluation data `input_fn`. :param input_fn: A function that constructs the input data for evaluation. The function should construct and return one of the following: * A `TFDataset` object, each elements of which is a tuple `(features, labels)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. :param eval_methods: a list of strings to specify the evaluation metrics to be used in this model :param steps: Number of steps for which to evaluate model. :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, evaluation is run with newly initialized `Variables` instead of ones restored from checkpoint. Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name. """ if not all( isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") from tensorflow_estimator.python.estimator.canned import prediction_keys with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) if isinstance(spec.predictions, dict): if "mae" in eval_methods: outputs = [ spec.predictions[ prediction_keys.PredictionKeys.PREDICTIONS] ] else: outputs = [ spec.predictions[ prediction_keys.PredictionKeys.LOGITS] ] else: outputs = nest.flatten(spec.predictions) if len(outputs) > 1: raise Exception( "Evaluate on more than one output is not " + "supported now") tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) if result.batch_per_thread < 0: batch_size = result.batch_size else: batch_size = result.batch_per_thread * result.get_num_partitions( ) eval_methods = [ self._to_bigdl_metric(m) for m in eval_methods ] results = tfnet.evaluate(result, batch_size, eval_methods) final_result = dict([(r.method, r.result) for r in results]) return final_result return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)