Exemplo n.º 1
0
def create_session(checkpoint_path, n_cpu_threads=-1):
    """Creates a MonitoredSession.
    
    Args:
      checkpoint_path (string): Path either to checkpoint directory or
                                directly to a checkpoint file.
      n_cpu_threads (int): Number of CPU threads. If negative, we
                           assume either GPU decoding or that all
                           CPU cores can be used.
    Returns:
      A TensorFlow MonitoredSession.
    """
    try:
        if os.path.isdir(checkpoint_path):
            checkpoint_path = saver.latest_checkpoint(checkpoint_path)
        else:
            logging.info("%s is not a directory. Interpreting as direct "
                         "path to checkpoint..." % checkpoint_path)
        return training.MonitoredSession(
            session_creator=training.ChiefSessionCreator(
                checkpoint_filename_with_path=checkpoint_path,
                config=session_config(n_cpu_threads)))
    except tf.errors.NotFoundError as e:
        logging.fatal(
            "Could not find all variables of the computation "
            "graph in the T2T checkpoint file. This means that the "
            "checkpoint does not correspond to the model specified in "
            "SGNMT. Please double-check pred_src_vocab_size, "
            "pred_trg_vocab_size, and all the t2t_* parameters. "
            "Also make sure that the checkpoint exists and is readable")
        raise AttributeError("Could not initialize TF session.")
Exemplo n.º 2
0
 def create_session(self, checkpoint_dir):
     """Creates a MonitoredSession for this predictor."""
     checkpoint_path = saver.latest_checkpoint(checkpoint_dir)
     return training.MonitoredSession(
         session_creator=training.ChiefSessionCreator(
             checkpoint_filename_with_path=checkpoint_path,
             config=self._session_config()))
Exemplo n.º 3
0
 def _create_session(self):
     """Creates a MonitoredSession for restoring model"""
     checkpoint_path = saver.latest_checkpoint(self.config.output_path)
     return training.MonitoredSession(
         session_creator=training.ChiefSessionCreator(
             checkpoint_filename_with_path=checkpoint_path,
             config=self._session_config()))
Exemplo n.º 4
0
  def predict(self, input_fn, predict_keys=None, hooks=None):
    """Returns predictions for given features.

    Args:
      input_fn: Input function returning features which is a dictionary of
        string feature name to `Tensor` or `SparseTensor`. If it returns a
        tuple, first item is extracted as features. Prediction continues until
        `input_fn` raises an end-of-input exception (`OutOfRangeError` or
        `StopIteration`).
      predict_keys: list of `str`, name of the keys to predict. It is used if
        the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used
        then rest of the predictions will be filtered from the dictionary. If
        `None`, returns all.
      hooks: List of `SessionRunHook` subclass instances. Used for callbacks
        inside the prediction call.

    Yields:
      Evaluated values of `predictions` tensors.

    Raises:
      ValueError: Could not find a trained model in model_dir.
      ValueError: if batch length of predictions are not same.
      ValueError: If there is a conflict between `predict_keys` and
        `predictions`. For example if `predict_keys` is not `None` but
        `EstimatorSpec.predictions` is not a `dict`.
    """
    hooks = list(hooks or [])
    # Check that model has been trained.
    checkpoint_path = saver.latest_checkpoint(self._model_dir)
    if not checkpoint_path:
      raise ValueError('Could not find trained model in model_dir: {}.'.format(
          self._model_dir))

    with ops.Graph().as_default() as g:
      random_seed.set_random_seed(self._config.tf_random_seed)
      training.create_global_step(g)
      features = self._get_features_from_input_fn(input_fn)
      estimator_spec = self._call_model_fn(features, None,
                                           model_fn_lib.ModeKeys.FIT)
      predictions = self._extract_keys(estimator_spec.predictions, predict_keys)
      with training.MonitoredSession(
          session_creator=training.ChiefSessionCreator(
              checkpoint_filename_with_path=checkpoint_path,
              scaffold=estimator_spec.scaffold,
              config=config_pb2.ConfigProto(allow_soft_placement=True)),
          hooks=hooks) as mon_sess:
        while not mon_sess.should_stop():
          preds_evaluated = mon_sess.run(predictions)
          if not isinstance(predictions, dict):
            for pred in preds_evaluated:
              yield pred
          else:
            for i in range(self._extract_batch_length(preds_evaluated)):
              yield {
                  key: value[i]
                  for key, value in six.iteritems(preds_evaluated)
              }
Exemplo n.º 5
0
 def _create_session(self):
     """Creates a MonitoredSession for this predictor."""
     try:
         checkpoint_path = saver.latest_checkpoint(self._checkpoint_dir)
         return training.MonitoredSession(
             session_creator=training.ChiefSessionCreator(
                 checkpoint_filename_with_path=checkpoint_path,
                 config=self._session_config()))
     except tf.errors.NotFoundError as e:
         logging.fatal(
             "Could not find all variables of the computation "
             "graph in the MoE checkpoint file. This means that the "
             "checkpoint does not correspond to the model specification.")
         raise AttributeError("Could not initialize TF session for MoE.")
Exemplo n.º 6
0
 def create_session(self):
     """Creates a MonitoredSession for this predictor."""
     try:
         checkpoint_path = saver.latest_checkpoint(self._checkpoint_dir)
         return training.MonitoredSession(
             session_creator=training.ChiefSessionCreator(
                 checkpoint_filename_with_path=checkpoint_path,
                 config=self._session_config()))
     except tf.errors.NotFoundError as e:
         logging.fatal(
             "Could not find all variables of the computation "
             "graph in the T2T checkpoint file. This means that the "
             "checkpoint does not correspond to the model specified in "
             "SGNMT. Please double-check pred_src_vocab_size, "
             "pred_trg_vocab_size, and all the t2t_* parameters.")
         raise AttributeError("Could not initialize TF session.")
Exemplo n.º 7
0
def create_session():
  """Creates a MonitoredSession for this predictor."""
  if not FLAGS.checkpoint_path:
    raise AttributeError("Please set --checkpoint_path")
  try:
    if os.path.isdir(FLAGS.checkpoint_path):
      checkpoint_path = saver.latest_checkpoint(FLAGS.checkpoint_path)
    else:
       checkpoint_path = FLAGS.checkpoint_path
       tf.logging.info("%s is not a directory. Interpreting as direct "
                    "path to checkpoint..." % checkpoint_path)
    return training.MonitoredSession(
        session_creator=training.ChiefSessionCreator(
        checkpoint_filename_with_path=checkpoint_path,
        config=session_config()))
  except tf.errors.NotFoundError as e:
    tf.logging.fatal("Could not find all variables of the computation "
                  "graph in the T2T checkpoint file. This means that the "
                  "checkpoint does not correspond to the specified model")
    raise AttributeError("Could not initialize TF session.")
Exemplo n.º 8
0
  def predict(self,
              input_fn,
              predict_keys=None,
              hooks=None,
              checkpoint_path=None,
              yield_single_examples=True):
    """Yields predictions for given features.

    Args:
      input_fn: A function that constructs the features. Prediction continues
        until `input_fn` raises an end-of-input exception (`OutOfRangeError` or
        `StopIteration`).
        See @{$get_started/premade_estimators#create_input_functions} for more
        information. The function should construct and return one of
        the following:

          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must have
            same constraints as below.
          * features: A `Tensor` or a dictionary of string feature name to
            `Tensor`. features are consumed by `model_fn`. They should satisfy
            the expectation of `model_fn` from inputs.
          * A tuple, in which case the first item is extracted as features.

      predict_keys: list of `str`, name of the keys to predict. It is used if
        the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used
        then rest of the predictions will be filtered from the dictionary. If
        `None`, returns all.
      hooks: List of `SessionRunHook` subclass instances. Used for callbacks
        inside the prediction call.
      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
        latest checkpoint in `model_dir` is used.
      yield_single_examples: If False, yield the whole batch as returned by the
        model_fn instead of decomposing the batch into individual elements. This
        is useful if model_fn return some tensor with first dimension not
        equal to the batch size

    Yields:
      Evaluated values of `predictions` tensors.

    Raises:
      ValueError: Could not find a trained model in model_dir.
      ValueError: if batch length of predictions are not same and
        yield_single_examples is True.
      ValueError: If there is a conflict between `predict_keys` and
        `predictions`. For example if `predict_keys` is not `None` but
        `EstimatorSpec.predictions` is not a `dict`.
    """
    hooks = _check_hooks_type(hooks)
    # Check that model has been trained.
    if not checkpoint_path:
      checkpoint_path = saver.latest_checkpoint(self._model_dir)
    if not checkpoint_path:
      raise ValueError('Could not find trained model in model_dir: {}.'.format(
          self._model_dir))

    with ops.Graph().as_default() as g:
      random_seed.set_random_seed(self._config.tf_random_seed)
      self._create_and_assert_global_step(g)
      features, input_hooks = self._get_features_from_input_fn(
          input_fn, model_fn_lib.ModeKeys.PREDICT)
      estimator_spec = self._call_model_fn(
          features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
      predictions = self._extract_keys(estimator_spec.predictions, predict_keys)
      all_hooks = list(input_hooks)
      all_hooks.extend(hooks)
      all_hooks.extend(list(estimator_spec.prediction_hooks or []))
      with training.MonitoredSession(
          session_creator=training.ChiefSessionCreator(
              checkpoint_filename_with_path=checkpoint_path,
              master=self._config.master,
              scaffold=estimator_spec.scaffold,
              config=self._session_config),
          hooks=all_hooks) as mon_sess:
        while not mon_sess.should_stop():
          preds_evaluated = mon_sess.run(predictions)
          if not yield_single_examples:
            yield preds_evaluated
          elif not isinstance(predictions, dict):
            for pred in preds_evaluated:
              yield pred
          else:
            for i in range(self._extract_batch_length(preds_evaluated)):
              yield {
                  key: value[i]
                  for key, value in six.iteritems(preds_evaluated)
              }
Exemplo n.º 9
0
def PlaceGraph(metagraph,
               cluster=None,
               allotted_time=3600,
               hparams=None,
               verbose=False):
    """Place the provided metagraph.

  Args:
    metagraph: the metagraph to place.
    cluster: an optional set of hardware resource to optimize the placement for.
      If none is specified, we'll optimize the placement for the hardware
      available on the local machine.
    allotted_time: the maximum amount to time in seconds to spend optimizing
      the placement.
    hparams: hyperparameters used to fine tune the placer.
    verbose: prints debug information if True.

  Returns:
    The placed metagraph.
  """
    if cluster is None:
        cluster = gcluster.Cluster()

    # Optimize the metagraph to speedup the placement
    rewriter_config = rewriter_config_pb2.RewriterConfig()
    rewriter_config.optimizers.append("pruning")
    rewriter_config.optimizers.append("constfold")
    rewriter_config.optimizers.append("arithmetic")
    rewriter_config.optimizers.append("dependency")
    rewriter_config.optimizers.append("pruning")
    optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config,
                                                 metagraph,
                                                 verbose=verbose,
                                                 cluster=cluster)
    optimized_metagraph = meta_graph_pb2.MetaGraphDef()
    optimized_metagraph.CopyFrom(metagraph)
    optimized_metagraph.graph_def.CopyFrom(optimized_graph)

    item = gitem.Item(optimized_metagraph)

    # Measure the runtime achievable with the original placement.
    try:
        _, original_run_time, _ = cluster.MeasureCosts(item)
        if verbose:
            print("Runtime for original placement: " + str(original_run_time))
    except errors.OpError as e:
        if verbose:
            print("Original placement isn't feasible: " + str(e))
        original_run_time = hparams.failing_signal

    if hparams is None:
        hparams = hierarchical_controller.hierarchical_controller_hparams()
    # We run with a single child
    hparams.num_children = 1

    with tf_ops.Graph().as_default():
        # Place all the nodes of the controller on the CPU. We don't want them to
        # fight for accelerator memory with the model to optimize.
        with tf_ops.device("/device:CPU:0"):
            model = hierarchical_controller.HierarchicalController(
                hparams, item, cluster)
            ops = model.build_controller()
            session_creator = training.ChiefSessionCreator()
            with training.MonitoredSession(
                    session_creator=session_creator) as sess:
                start_time = time.time()
                current_time = start_time
                while current_time - start_time < allotted_time:
                    grouping_actions = model.generate_grouping(sess)
                    input_to_seq2seq = model.create_group_embeddings(
                        grouping_actions, verbose=verbose)
                    model.generate_placement(input_to_seq2seq, sess)
                    try:
                        run_time = model.eval_placement(sess, verbose=verbose)
                    except errors.OpError as e:
                        if verbose:
                            print("Failed to run graph:" + str(e))
                        run_time = hparams.failing_signal
                    updated = model.update_reward(sess,
                                                  run_time,
                                                  verbose=verbose)
                    if updated and run_time < original_run_time:
                        if verbose:
                            print("Found better placement, with runtime " +
                                  str(run_time))
                        model.export_placement(metagraph)

                    model.process_reward(sess)

                    current_time = time.time()

    return metagraph
    def custom_predict(self,
                       perturb,
                       ranker,
                       input_fn,
                       predict_keys=None,
                       hooks=None,
                       checkpoint_path=None,
                       yield_single_examples=True):

        if not checkpoint_path:
            checkpoint_path = checkpoint_management.latest_checkpoint(
                ranker._model_dir)
        if not checkpoint_path:
            logging.info(
                'Could not find trained model in model_dir: {}, running '
                'initialization to predict.'.format(ranker._model_dir))
        with tf.Graph().as_default() as g:

            self.perturb_on = tf.compat.v1.placeholder(tf.bool)

            random_seed.set_random_seed(ranker._config.tf_random_seed)
            ranker._create_and_assert_global_step(g)
            features, input_hooks = ranker._get_features_from_input_fn(
                input_fn, ModeKeys.PREDICT)
            estimator_spec = ranker._call_model_fn(features, None,
                                                   ModeKeys.PREDICT,
                                                   ranker.config)

            # Call to warm_start has to be after model_fn is called.
            ranker._maybe_warm_start(checkpoint_path)

            predictions = estimator_spec.predictions
            all_hooks = list(input_hooks)
            all_hooks.extend(list([]))

            self.grad_variable_pair_tensor = calculate_grad_var_pair(self)

            with training.MonitoredSession(
                    session_creator=training.ChiefSessionCreator(
                        checkpoint_filename_with_path=checkpoint_path,
                        master=ranker._config.master,
                        scaffold=estimator_spec.scaffold,
                        config=ranker._session_config),
                    hooks=all_hooks) as mon_sess:
                while not mon_sess.should_stop():
                    [
                        preds_evaluated,
                        temp_query_features_evaluated,
                        temp_answer_features_evaluated,
                        temp_embedded_features_evaluated,
                        temp_labels_evaluated,
                        temp_normalized_features_evaluated,
                        self.grad_variable_pair_evaluated,
                    ] = mon_sess.run([
                        predictions,
                        self.query_features,
                        self.answer_features,
                        self.embedded_features_tensor,
                        self.labels_tensor,
                        self.normalized_features,
                        self.grad_variable_pair_tensor,
                    ], {self.perturb_on: perturb})
                    # Save values for tensors during first nonperturbed evaluation to be
                    # used in next execution.
                    if self.first_eval:
                        self.query_features_evaluated = temp_query_features_evaluated
                        self.answer_features_evaluated = temp_answer_features_evaluated
                        self.embedded_features_evaluated = temp_embedded_features_evaluated
                        self.labels_evaluated = temp_labels_evaluated
                        self.normalized_features_evaluated = temp_normalized_features_evaluated
                        self.first_eval = False

                    if not yield_single_examples:
                        yield preds_evaluated
                    elif not isinstance(predictions, dict):
                        for pred in preds_evaluated:
                            yield pred
                    else:
                        for i in range(
                                self._extract_batch_length(preds_evaluated)):
                            yield {
                                key: value[i]
                                for key, value in six.iteritems(
                                    preds_evaluated)
                            }
Exemplo n.º 11
0
    def predict_with_guide(self,
                           input_fn,
                           predict_keys=None,
                           hooks=None,
                           checkpoint_path=None,
                           latest_filename=None,
                           yield_single_examples=True):
        hooks = estimator_lib._check_hooks_type(hooks)

        checkpoint_path = self._checkpoint_path(checkpoint_path,
                                                latest_filename)

        with ops.Graph().as_default() as g:
            random_seed.set_random_seed(self._config.tf_random_seed)
            self._create_and_assert_global_step(g)
            features, labels, input_hooks = self._get_features_and_labels_from_input_fn(
                input_fn, model_fn_lib.ModeKeys.EVAL)

            features_ph = {
                key: array_ops.placeholder(value.dtype, value.shape, name=key)
                for key, value in features.items()
            }
            labels_ph = array_ops.placeholder(labels.dtype,
                                              labels.shape,
                                              name="labels")
            feed_guide_hook = FeedGuideHook(features_ph, labels_ph, features,
                                            labels, self.model_dir)

            estimator_spec = self._call_model_fn(features_ph, labels_ph,
                                                 model_fn_lib.ModeKeys.PREDICT,
                                                 self.config)

            if isinstance(predict_keys, list):
                predict_keys += list(
                    self.params["model_instances"][0].metrics_dict.keys())
            elif predict_keys is None:
                # Evaluating volume don't need metrics in model, we use XXXPred to generate 3D predict
                predict_keys = [
                    x for x in estimator_spec.predictions
                    if x not in self.params["model_instances"][0].metrics_dict
                ]
                predict_keys.extend(
                    list(self.params["model_instances"][0].metrics_eval))
            else:
                raise TypeError(
                    "predict_keys must be None(for 3d eval) or a list(for 2d eval, "
                    "for example [\"Names\", \"Indices\"])")
            predictions = self._extract_keys(estimator_spec.predictions,
                                             predict_keys)
            feed_guide_hook.predictions = predictions

            all_hooks = list(input_hooks) + [feed_guide_hook]
            all_hooks.extend(hooks)
            all_hooks.extend(list(estimator_spec.prediction_hooks or []))

            with training.MonitoredSession(
                    session_creator=training.ChiefSessionCreator(
                        checkpoint_filename_with_path=checkpoint_path,
                        master=self._config.master,
                        scaffold=estimator_spec.scaffold,
                        config=self._session_config),
                    hooks=all_hooks) as mon_sess:
                while not mon_sess.should_stop():
                    preds_evaluated = mon_sess.run(predictions)
                    if not yield_single_examples:
                        yield preds_evaluated
                    elif not isinstance(predictions, dict):
                        for pred in preds_evaluated:
                            yield pred
                    else:
                        for i in range(
                                self._extract_batch_length(preds_evaluated)):
                            yield {
                                key: value[i]
                                for key, value in six.iteritems(
                                    preds_evaluated)
                            }