def create_run_config(iterations_per_loop, **kwargs):
  return tpu_config.RunConfig(
      master='',
      tpu_config=tpu_config.TPUConfig(
          iterations_per_loop=iterations_per_loop,
          **kwargs),
  )
Exemplo n.º 2
0
def create_run_config(iterations_per_loop, **kwargs):
  return tpu_config.RunConfig(
      master='',
      tpu_config=tpu_config.TPUConfig(
          iterations_per_loop=iterations_per_loop,
          num_shards=FLAGS.test_num_shards,
          **kwargs),
  )
Exemplo n.º 3
0
 def test_evaluation_master_defaults_to_master_in_tf_config(self):
     tf_config = {
         'session_master': '_master_123',
     }
     with _set_tf_config_env_variable(tf_config):
         run_config = tpu_config_lib.RunConfig()
         self.assertEqual('_master_123', run_config.master)
         self.assertEqual('_master_123', run_config.evaluation_master)
def create_run_config(iterations_per_loop, **kwargs):
    if 'num_shards' not in kwargs:
        kwargs['num_shards'] = FLAGS.test_num_shards
    return tpu_config.RunConfig(
        master='',
        tpu_config=tpu_config.TPUConfig(
            iterations_per_loop=iterations_per_loop, **kwargs),
    )
Exemplo n.º 5
0
 def test_with_tf_config(self):
     tf_config = {
         'service': {
             'tpu_worker_job_name': '_my_new_name',
         }
     }
     with _set_tf_config_env_variable(tf_config):
         config = tpu_config_lib.RunConfig()
         self.assertEqual('_my_new_name', config.tpu_config.tpu_job_name)
Exemplo n.º 6
0
 def _evaluate_iterations_per_loop_in_seconds(self, value, expected_value,
                                              expected_unit):
     config = tpu_config_lib.RunConfig(tpu_config=tpu_config_lib.TPUConfig(
         iterations_per_loop=value))
     self.assertEqual(config.tpu_config.iterations_per_loop, value)
     d = util_lib.parse_iterations_per_loop(
         config.tpu_config.iterations_per_loop)
     self.assertEqual(expected_value, d.value)
     self.assertEqual(expected_unit, d.unit)
Exemplo n.º 7
0
def create_run_config(iterations_per_loop, num_shards, num_cores_per_replica,
                      **kwargs):
  return tpu_config.RunConfig(
      master='',
      tpu_config=tpu_config.TPUConfig(
          iterations_per_loop=iterations_per_loop,
          num_shards=num_shards,
          num_cores_per_replica=num_cores_per_replica,
          **kwargs))
Exemplo n.º 8
0
 def test_user_overwrites_master_in_tf_config(self):
     tf_config = {
         'session_master': '_master_123',
         'eval_session_master': '_eval_master_123'
     }
     with _set_tf_config_env_variable(tf_config):
         run_config = tpu_config_lib.RunConfig(master='_new_master_123')
         self.assertEqual('_new_master_123', run_config.master)
         self.assertEqual('_eval_master_123', run_config.evaluation_master)
Exemplo n.º 9
0
 def test_respect_evaluation_master_in_tf_config(self):
     tf_config = {
         'cluster': {
             run_config_lib.TaskType.CHIEF: ['host0:0'],
         },
         'task': {
             'type': run_config_lib.TaskType.EVALUATOR,
             'index': 0
         },
     }
     with _set_tf_config_env_variable(tf_config):
         run_config = tpu_config_lib.RunConfig(master='_something')
         self.assertEqual('', run_config.evaluation_master)
Exemplo n.º 10
0
 def test_no_session_config_set_with_cluster_spec(self):
     tf_config = {
         'cluster': {
             run_config_lib.TaskType.CHIEF: ['host3:3'],
             run_config_lib.TaskType.WORKER: ['host3:4']
         },
         'task': {
             'type': run_config_lib.TaskType.CHIEF,
             'index': 0
         }
     }
     with _set_tf_config_env_variable(tf_config):
         run_config = tpu_config_lib.RunConfig()
         self.assertIsNone(run_config.session_config)
Exemplo n.º 11
0
 def test_no_session_config_overwrite_with_cluster_spec(self):
   tf_config = {
       'cluster': {
           run_config_lib.TaskType.CHIEF: ['host3:3'],
           run_config_lib.TaskType.WORKER: ['host3:4']
       },
       'task': {
           'type': run_config_lib.TaskType.CHIEF,
           'index': 0
       }
   }
   with _set_tf_config_env_variable(tf_config):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
     run_config = tpu_config_lib.RunConfig(session_config=session_config)
     self.assertEqual(session_config, run_config.session_config)
Exemplo n.º 12
0
def get_estimator(use_tpu,
                  output_dir,
                  feature_columns,
                  batch_size,
                  optimizer_type='adagrad',
                  grad_multiplier_fn=None):
  run_config = tpu_config.RunConfig(
      master='',
      model_dir=output_dir,
      session_config=tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=False),
      tpu_config=tpu_config.TPUConfig(
          iterations_per_loop=1,
          num_shards=FLAGS.test_num_shards,
          per_host_input_for_training=(
              tpu_config.InputPipelineConfig.PER_HOST_V2)),
      save_checkpoints_steps=1)

  if optimizer_type == 'adagrad':
    optimization_parameters = tpu_estimator.AdagradParameters(
        LEARNING_RATE,
        ADADGRAD_INIT_VALUE,
        use_gradient_accumulation=False)
  elif optimizer_type == 'sgd':
    optimization_parameters = tpu_estimator.StochasticGradientDescentParameters(
        LEARNING_RATE)

  estimator = tpu_estimator.TPUEstimator(
      model_fn=create_model_fn(feature_columns, optimizer_type),
      use_tpu=use_tpu,
      config=run_config,
      train_batch_size=batch_size,
      eval_batch_size=batch_size,
      embedding_config_spec=tpu_estimator.EmbeddingConfigSpec(
          feature_columns=feature_columns,
          optimization_parameters=optimization_parameters,
          experimental_gradient_multiplier_fn=grad_multiplier_fn))
  return estimator
Exemplo n.º 13
0
 def test_default_name(self):
     config = tpu_config_lib.RunConfig()
     self.assertIsNone(config.tpu_config.tpu_job_name)
Exemplo n.º 14
0
def train(model_dir,
          overwrite=False,
          model=gin.REQUIRED,
          training_steps=gin.REQUIRED,
          unsupervised_data_seed=gin.REQUIRED,
          supervised_data_seed=gin.REQUIRED,
          model_seed=gin.REQUIRED,
          batch_size=gin.REQUIRED,
          num_labelled_samples=gin.REQUIRED,
          train_percentage=gin.REQUIRED,
          name=""):
  """Trains the estimator and exports the snapshot and the gin config.

  The use of this function requires the gin binding 'dataset.name' to be
  specified as that determines the data set used for training.

  Args:
    model_dir: String with path to directory where model output should be saved.
    overwrite: Boolean indicating whether to overwrite output directory.
    model: GaussianEncoderModel that should be trained and exported.
    training_steps: Integer with number of training steps.
    unsupervised_data_seed: Integer with random seed used for the unsupervised
      data.
    supervised_data_seed: Integer with random seed for supervised data.
    model_seed: Integer with random seed used for the model.
      batch_size: Integer with the batch size.
    num_labelled_samples: Integer with number of labelled observations for
      training.
    train_percentage: Fraction of the labelled data to use for training (0,1)
    name: Optional string with name of the model (can be used to name models).
  """
  # We do not use the variable 'name'. Instead, it can be used to name results
  # as it will be part of the saved gin config.
  del name

  # Delete the output directory if necessary.
  if tf.gfile.IsDirectory(model_dir):
    if overwrite:
      tf.gfile.DeleteRecursively(model_dir)
    else:
      raise ValueError("Directory already exists and overwrite is False.")

  # Obtain the dataset.
  dataset = named_data.get_named_ground_truth_data()
  (sampled_observations,
   sampled_factors,
   factor_sizes) = semi_supervised_utils.sample_supervised_data(
       supervised_data_seed, dataset, num_labelled_samples)
  # We instantiate the model class.
  if  issubclass(model, semi_supervised_vae.BaseS2VAE):
    model = model(factor_sizes)
  else:
    model = model()

  # We create a TPUEstimator based on the provided model. This is primarily so
  # that we could switch to TPU training in the future. For now, we train
  # locally on GPUs.
  run_config = tpu_config.RunConfig(
      tf_random_seed=model_seed,
      keep_checkpoint_max=1,
      tpu_config=tpu_config.TPUConfig(iterations_per_loop=500))
  tpu_estimator = TPUEstimator(
      use_tpu=False,
      model_fn=model.model_fn,
      model_dir=model_dir,
      train_batch_size=batch_size,
      eval_batch_size=batch_size,
      config=run_config)

  # Set up time to keep track of elapsed time in results.
  experiment_timer = time.time()
  # Do the actual training.
  tpu_estimator.train(
      input_fn=_make_input_fn(dataset, num_labelled_samples,
                              unsupervised_data_seed, sampled_observations,
                              sampled_factors, train_percentage),
      steps=training_steps)
  # Save model as a TFHub module.
  output_shape = named_data.get_named_ground_truth_data().observation_shape
  module_export_path = os.path.join(model_dir, "tfhub")
  gaussian_encoder_model.export_as_tf_hub(model, output_shape,
                                          tpu_estimator.latest_checkpoint(),
                                          module_export_path)

  # Save the results. The result dir will contain all the results and config
  # files that we copied along, as we progress in the pipeline. The idea is that
  # these files will be available for analysis at the end.
  results_dict = tpu_estimator.evaluate(
      input_fn=_make_input_fn(
          dataset,
          num_labelled_samples,
          unsupervised_data_seed,
          sampled_observations,
          sampled_factors,
          train_percentage,
          num_batches=num_labelled_samples,
          validation=True))
  results_dir = os.path.join(model_dir, "results")
  results_dict["elapsed_time"] = time.time() - experiment_timer
  results.update_result_directory(results_dir, "train", results_dict)
Exemplo n.º 15
0
 def _validate_invalid_iterations_per_loop(self, iterations_per_loop):
     with self.assertRaisesRegexp(ValueError, 'must be positive'):
         tpu_config_lib.RunConfig(tpu_config=tpu_config_lib.TPUConfig(
             iterations_per_loop=iterations_per_loop))
Exemplo n.º 16
0
 def test_default_values(self):
     run_config = tpu_config_lib.RunConfig()
     self.assertEqual('', run_config.master)
     self.assertEqual('', run_config.evaluation_master)
Exemplo n.º 17
0
 def test_user_provided_master_and_evaluation_master(self):
     run_config = tpu_config_lib.RunConfig(
         master='_master_123', evaluation_master='_eval_master_123')
     self.assertEqual('_master_123', run_config.master)
     self.assertEqual('_eval_master_123', run_config.evaluation_master)
Exemplo n.º 18
0
 def test_fail_with_invalid_num_shards(self):
     with self.assertRaisesRegexp(ValueError, 'must be positive'):
         tpu_config_lib.RunConfig(tpu_config=tpu_config_lib.TPUConfig(
             num_shards=0))
Exemplo n.º 19
0
 def test_no_session_config_overwrite_in_local_case(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
     run_config = tpu_config_lib.RunConfig(session_config=session_config)
     self.assertEqual(session_config, run_config.session_config)
Exemplo n.º 20
0
 def test_no_session_config_set_in_local_case(self):
     run_config = tpu_config_lib.RunConfig()
     self.assertIsNone(run_config.session_config)
Exemplo n.º 21
0
 def test_evaluation_master_defaults_to_master(self):
     run_config = tpu_config_lib.RunConfig(master='_master_123')
     self.assertEqual('_master_123', run_config.master)
     self.assertEqual('_master_123', run_config.evaluation_master)
Exemplo n.º 22
0
  def get_activations_and_sequence_lengths(
      self,
      embedding_weights: List[List[float]],
      sparse_ids: tf.SparseTensorValue,
      batch_size: int,
      max_sequence_length: int,
      dimension: int,
      combiner: Text = 'mean',
  ) -> Tuple[tf.Tensor, tf.Tensor]:
    """Gets the activations and seq lengths for a batch of sparse IDs.

    This method uses TPUEstimator and the Feature Column API to get embedding
    activations for a batch of sparse of sparse IDs using a specified set of
    embedding weights.

    Args:
      embedding_weights: The embedding weights as a 2D list of floats.  The
        outer list length is the vocabulary size of the embedding table.  The
        inner list length is the dimension of the embedding weights.
      sparse_ids: The embedding IDs to lookup. This is a 2D SparseTensorValue of
        shape [batch_size, max_sequence_length].
      batch_size: The size of the first dimension of sparse_ids.
      max_sequence_length:  The size of the second dimension of sparse_ids.
      dimension: The embedding dimension size (number of floats for each
        embedding ID).
      combiner: The embedding column combiner (used for multivalent features).

    Returns:
      A tuple containing:
        activations:  The activations for the specified sparse_ids.
          type=float32, shape=[batch_size, max_sequence_length, dimension]
        sequence_lengths: The sequence length of each example.
          type=int64. shape=[batch_size].
    """

    vocab_size = len(embedding_weights)
    categorical_column = (
        tf.feature_column.sequence_categorical_column_with_identity(
            key=self._KEY,
            num_buckets=vocab_size,
        ))

    # Create embedding column initialized with weights provided by caller.
    embedding_column = tf.tpu.experimental.embedding_column(
        categorical_column,
        dimension=dimension,
        max_sequence_length=max_sequence_length,
        initializer=tf.constant_initializer(embedding_weights),
        combiner=combiner,
    )

    # Add an SGD optimizer. This choice is arbitrary for computing activations.
    # It's only required to avoid an undefined gradients error.
    embedding_opt = tf.tpu.experimental.StochasticGradientDescentParameters(.1)
    embedding_config_spec = tpu_estimator.EmbeddingConfigSpec(
        feature_columns=[embedding_column],
        optimization_parameters=embedding_opt,
    )

    def _input_fn(params: Dict[Text, int]) -> tf.data.Dataset:
      """Creates a batched dataset containing the sparse_ids as a feature."""
      # Convert sparse IDs to batched dataset.
      sparse_ids_dataset = tf.data.Dataset.range(1).map(
          lambda x: {self._KEY: tf.SparseTensor.from_value(sparse_ids)})

      # Unbatch and rebatch the dataset based on the batch_size param from
      # TPUEstimator. This is necessary for shape validation performed internal
      # to TPUEstimator.
      return sparse_ids_dataset.unbatch().repeat().batch(params['batch_size'])

    def _host_call(
        concat_activations: tf.Tensor,
        concat_sequence_lengths: tf.Tensor,
    ) -> List[tf.Operation]:
      """Stores the activations and sequence lengths into a summary.

      TPUEstimator will concat the activations and sequence lengths from the
      minibatches on each core along axis=0 and pass them to this host call.
      This host call writes them to a file using the TF summary APIs.

      Args:
        concat_activations: The activations for the global batch. 2D
          Tensor(type=float32, shape=[batch_size, max_sequence_length]).
        concat_sequence_lengths:  The sequence lengths for the global batch. 2D
          Tensor(type=int64, shape=[batch_size, max_sequence_length]).

      Returns:
        A list of summary ops for TPUEstimator to run on the host.
      """
      with contrib_summary.create_file_writer(self._summary_dir).as_default():
        with contrib_summary.always_record_summaries():
          contrib_summary.generic(
              self._SUMMARY_ACTIVATIONS,
              concat_activations,
          )
          contrib_summary.generic(self._SUMMARY_SEQUENCE_LENGTHS,
                                  concat_sequence_lengths)
          return contrib_summary.all_summary_ops()

    def _model_fn(
        features: Dict[Text, tf.Tensor],
        params: Dict[Text, int],
        mode: model_fn_lib.ModeKeys,
    ) -> tpu_estimator.TPUEstimatorSpec:
      """A model which writes activations and sequence lengths to a file.

      This method creates a model to extract the activations and sequence
      lengths on each TPU core and pass them to a host call which writes them
      to a file.

      The model also applies an optimizer to the activations simply to avoid an
      undefined gradients error.

      Args:
        features: A dictionary mapping keys to tensor inputs.
        params: Parameters passed by TPUEstimator.
        mode: Mode can be (TRAIN, EVAL, PREDICT).

      Returns:
        A TPUEstimatorSpec which holds the training_op that TPUEstimator will
        run on TPU and the host_call that TPUEstimator will run on the host.
      """
      del params
      input_layer = tf.keras.experimental.SequenceFeatures([embedding_column])
      activations, sequence_lengths = input_layer(features)
      opt = tf.tpu.CrossShardOptimizer(tf.train.GradientDescentOptimizer(0.1))
      loss = tf.reduce_sum(activations)
      train_op = opt.minimize(loss, global_step=tf.train.get_global_step())

      return tpu_estimator.TPUEstimatorSpec(
          mode=mode,
          loss=loss,
          train_op=train_op,
          host_call=(_host_call, [activations, sequence_lengths]),
      )

    tpu_config = tpu_config_lib.TPUConfig(
        per_host_input_for_training=(
            tpu_config_lib.InputPipelineConfig.PER_HOST_V2),)
    run_config = tpu_config_lib.RunConfig(
        session_config=tf.ConfigProto(isolate_session_state=True),
        tpu_config=tpu_config,
    )
    estimator = tpu_estimator.TPUEstimator(
        model_fn=_model_fn,
        model_dir=self._model_dir,
        use_tpu=True,
        train_batch_size=batch_size,
        eval_batch_size=batch_size,
        config=run_config,
        embedding_config_spec=embedding_config_spec,
    )

    # Train for 1 step and store the activations as summaries.
    estimator.train(_input_fn, steps=1)

    # Read the event summaries and decode the activation tensors.
    output = {}
    for filename in tf.io.gfile.listdir(self._summary_dir):
      filepath = os.path.join(os.path.join(self._summary_dir, filename))
      for event in tf.train.summary_iterator(filepath):
        for v in event.summary.value:
          decoded = tf.io.decode_raw(v.tensor.tensor_content, v.tensor.dtype)
          shape = tf.TensorShape(v.tensor.tensor_shape)
          output[v.tag] = tf.reshape(decoded, shape)
    return (output[self._SUMMARY_ACTIVATIONS],
            output[self._SUMMARY_SEQUENCE_LENGTHS])
Exemplo n.º 23
0
 def test_fail_with_iterations_per_loop(self):
   with self.assertRaisesRegexp(ValueError, 'must be positive'):
     tpu_config_lib.RunConfig(
         tpu_config=tpu_config_lib.TPUConfig(iterations_per_loop=0))