Exemplo n.º 1
0
  def setUpClass(cls):
    tmp_dir = tf.test.get_temp_dir()
    shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)
    cls.data_dir = tmp_dir

    # Generate a small test dataset
    registry.problem("tiny_algo").generate_data(cls.data_dir, None)
Exemplo n.º 2
0
  def setUpClass(cls):
    tmp_dir = tf.test.get_temp_dir()
    shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    # Generate a small test dataset
    FLAGS.problems = "tiny_algo"
    TrainerUtilsTest.data_dir = tmp_dir
    registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir,
                                                   None)
Exemplo n.º 3
0
def generate_data():
  # Generate data if requested.
  data_dir = os.path.expanduser(FLAGS.data_dir)
  tmp_dir = os.path.expanduser(FLAGS.tmp_dir)
  tf.gfile.MakeDirs(data_dir)
  tf.gfile.MakeDirs(tmp_dir)

  problem_name = get_problem_name()
  tf.logging.info("Generating data for %s" % problem_name)
  registry.problem(problem_name).generate_data(data_dir, tmp_dir)
Exemplo n.º 4
0
  def TestVideoModel(self,
                     in_frames,
                     out_frames,
                     hparams,
                     model,
                     expected_last_dim):

    x = np.random.random_integers(0, high=255, size=(8, in_frames, 64, 64, 3))
    y = np.random.random_integers(0, high=255, size=(8, out_frames, 64, 64, 3))

    hparams.video_num_input_frames = in_frames
    hparams.video_num_target_frames = out_frames

    problem = registry.problem("video_stochastic_shapes10k")
    p_hparams = problem.get_hparams(hparams)
    hparams.problem = problem
    hparams.problem_hparams = p_hparams

    with self.test_session() as session:
      features = {
          "inputs": tf.constant(x, dtype=tf.int32),
          "targets": tf.constant(y, dtype=tf.int32),
      }
      model = model(
          hparams, tf.estimator.ModeKeys.TRAIN)
      logits, _ = model(features)
      session.run(tf.global_variables_initializer())
      res = session.run(logits)
    expected_shape = y.shape + (expected_last_dim,)
    self.assertEqual(res.shape, expected_shape)
Exemplo n.º 5
0
def main(_):

  tf.gfile.MakeDirs(FLAGS.data_dir)
  tf.gfile.MakeDirs(FLAGS.tmp_dir)

  # Create problem if not already defined
  problem_name = "gym_discrete_problem_with_agent_on_%s" % FLAGS.game
  if problem_name not in registry.list_problems():
    gym_env.register_game(FLAGS.game)

  # Generate
  tf.logging.info("Running %s environment for %d steps for trajectories.",
                  FLAGS.game, FLAGS.num_env_steps)
  problem = registry.problem(problem_name)
  problem.settable_num_steps = FLAGS.num_env_steps
  problem.settable_eval_phase = FLAGS.eval
  problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir)

  # Log stats
  if problem.statistics.number_of_dones:
    mean_reward = (problem.statistics.sum_of_rewards /
                   problem.statistics.number_of_dones)
    tf.logging.info("Mean reward: %.2f, Num dones: %d",
                    mean_reward,
                    problem.statistics.number_of_dones)
Exemplo n.º 6
0
    def __init__(self, translate_host, translate_port, source_lang, target_lang, model_name, problem, t2t_usr_dir, data_dir, preprocess_cmd, postprocess_cmd):
        """Initialize a TransformerTranslator object according to the given 
        configuration settings.
        
        @param translate_port: the port at which the Moses translator operates
        @param recase_port: the port at which the recaser operates
        @param source_lang: source language (ISO-639-1 ID)
        @param target_lang: target language (ISO-639-1 ID)
        @param preprocess_cmd: bash command for text preprocessing
        @param postprocess_cmd: bash command for text posprocessing
        """
        # precompile Tensorflow server addresses
        self.server = translate_host + ":" + translate_port

        # initialize text processing tools (can be shared among threads)
        self.tokenizer = Tokenizer({'lowercase': True,
                                    'moses_escape': True})
        self.preprocess = preprocess_cmd
        self.postprocess = postprocess_cmd
        usr_dir.import_usr_dir(t2t_usr_dir)
        self.problem = registry.problem(problem)
        hparams = tf.contrib.training.HParams(
            data_dir=os.path.expanduser(data_dir))
        self.problem.get_hparams(hparams)
        self.request_fn = serving_utils.make_grpc_request_fn(
            servable_name=model_name,
            server=self.server,
            timeout_secs=30)
Exemplo n.º 7
0
def add_problem_hparams(hparams, problem_name):
  """Add problem hparams for the problems."""
  problem = registry.problem(problem_name)
  p_hparams = problem.get_hparams(hparams)

  hparams.problem = problem
  hparams.problem_hparams = p_hparams
Exemplo n.º 8
0
def add_problem_hparams(hparams, problem_name_or_instance):
  """Add problem hparams for the problems."""
  if isinstance(problem_name_or_instance, Problem):
    problem = problem_name_or_instance
  else:
    problem = registry.problem(problem_name_or_instance)
  p_hparams = problem.get_hparams(hparams)
  hparams.problem = problem
  hparams.problem_hparams = p_hparams
Exemplo n.º 9
0
def add_problem_hparams(hparams, problems):
  """Add problem hparams for the problems."""
  hparams.problems = []
  hparams.problem_instances = []
  for problem_name in problems.split("-"):
    problem = registry.problem(problem_name)
    p_hparams = problem.get_hparams(hparams)

    hparams.problem_instances.append(problem)
    hparams.problems.append(p_hparams)
Exemplo n.º 10
0
def generate_data_for_registered_problem(problem_name):
  tf.logging.info("Generating data for %s.", problem_name)
  if FLAGS.num_shards:
    raise ValueError("--num_shards should not be set for registered Problem.")
  problem = registry.problem(problem_name)
  task_id = None if FLAGS.task_id < 0 else FLAGS.task_id
  problem.generate_data(
      os.path.expanduser(FLAGS.data_dir),
      os.path.expanduser(FLAGS.tmp_dir),
      task_id=task_id)
Exemplo n.º 11
0
def init():
  # global input_encoder, output_decoder, fname, problem
  global problem
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.logging.info("importing ghsumm/trainer from {}".format(t2t_usr_dir))
  usr_dir.import_usr_dir(t2t_usr_dir)
  print(t2t_usr_dir)
  problem = registry.problem(problem_name)
  hparams = tf.contrib.training.HParams(data_dir=os.path.expanduser(data_dir))
  problem.get_hparams(hparams)
Exemplo n.º 12
0
def score_file(filename):
  """Score each line in a file and return the scores."""
  # Prepare model.
  hparams = create_hparams()
  encoders = registry.problem(FLAGS.problem).feature_encoders(FLAGS.data_dir)
  has_inputs = "inputs" in encoders

  # Prepare features for feeding into the model.
  if has_inputs:
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
  targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
  batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
  features = {
      "inputs": batch_inputs,
      "targets": batch_targets,
  } if has_inputs else {"targets": batch_targets}

  # Prepare the model and the graph when model runs on features.
  model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL)
  _, losses = model(features)
  saver = tf.train.Saver()

  with tf.Session() as sess:
    # Load weights from checkpoint.
    ckpts = tf.train.get_checkpoint_state(FLAGS.output_dir)
    ckpt = ckpts.model_checkpoint_path
    saver.restore(sess, ckpt)
    # Run on each line.
    with tf.gfile.Open(filename) as f:
      lines = f.readlines()
    results = []
    for line in lines:
      tab_split = line.split("\t")
      if len(tab_split) > 2:
        raise ValueError("Each line must have at most one tab separator.")
      if len(tab_split) == 1:
        targets = tab_split[0].strip()
      else:
        targets = tab_split[1].strip()
        inputs = tab_split[0].strip()
      # Run encoders and append EOS symbol.
      targets_numpy = encoders["targets"].encode(
          targets) + [text_encoder.EOS_ID]
      if has_inputs:
        inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID]
      # Prepare the feed.
      feed = {
          inputs_ph: inputs_numpy,
          targets_ph: targets_numpy
      } if has_inputs else {targets_ph: targets_numpy}
      # Get the score.
      np_loss = sess.run(losses["training"], feed)
      results.append(np_loss)
  return results
Exemplo n.º 13
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  # Create hparams
  hparams = create_hparams()
  hparams.force_full_predict = True
  batch_size = hparams.batch_size

  # Iterating over dev/test partition of the data.
  # Change the data partition if necessary.
  dataset = registry.problem(FLAGS.problem).dataset(
      tf.estimator.ModeKeys.PREDICT,
      shuffle_files=False,
      hparams=hparams)

  dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
  data = dataset.make_one_shot_iterator().get_next()
  input_data = dict((k, data[k]) for k in data.keys() if k.startswith("input"))

  # Creat model
  model_cls = registry.model(FLAGS.model)
  model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT)
  prediction_ops = model.infer(input_data)

  # Confusion Matrix
  nr = hparams.problem.num_rewards
  cm_per_frame = np.zeros((nr, nr), dtype=np.uint64)
  cm_next_frame = np.zeros((nr, nr), dtype=np.uint64)

  saver = tf.train.Saver()
  with tf.train.SingularMonitoredSession() as sess:
    # Load latest checkpoint
    ckpt = tf.train.get_checkpoint_state(FLAGS.output_dir).model_checkpoint_path
    saver.restore(sess.raw_session(), ckpt)

    counter = 0
    while not sess.should_stop():
      counter += 1
      if counter % 1 == 0:
        print(counter)

      # Predict next frames
      rew_pd, rew_gt = sess.run(
          [prediction_ops["target_reward"], data["target_reward"]])

      for i in range(batch_size):
        cm_next_frame[rew_gt[i, 0, 0], rew_pd[i, 0, 0]] += 1
        for gt, pd in zip(rew_gt[i], rew_pd[i]):
          cm_per_frame[gt, pd] += 1

  print_confusion_matrix("Per-frame Confusion Matrix", cm_per_frame)
  print_confusion_matrix("Next-frame Confusion Matrix", cm_next_frame)
Exemplo n.º 14
0
def main(argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
  t2t_trainer.maybe_log_registry_and_exit()


  if FLAGS.generate_data:
    t2t_trainer.generate_data()

  if argv:
    t2t_trainer.set_hparams_from_args(argv[1:])
  hparams = t2t_trainer.create_hparams()
  trainer_lib.add_problem_hparams(hparams, FLAGS.problem)
  pruning_params = create_pruning_params()
  pruning_strategy = create_pruning_strategy(pruning_params.strategy)

  config = t2t_trainer.create_run_config(hparams)
  params = {"batch_size": hparams.batch_size}

  # add "_rev" as a hack to avoid image standardization
  problem = registry.problem(FLAGS.problem)
  input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL,
                                             hparams)
  dataset = input_fn(params, config).repeat()
  features, labels = dataset.make_one_shot_iterator().get_next()

  sess = tf.Session()

  model_fn = t2t_model.T2TModel.make_estimator_model_fn(
      FLAGS.model, hparams, use_tpu=FLAGS.use_tpu)
  spec = model_fn(
      features,
      labels,
      tf.estimator.ModeKeys.EVAL,
      params=hparams,
      config=config)

  # Restore weights
  saver = tf.train.Saver()
  checkpoint_path = os.path.expanduser(FLAGS.output_dir or
                                       FLAGS.checkpoint_path)
  saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path))

  def eval_model():
    preds = spec.predictions["predictions"]
    preds = tf.argmax(preds, -1, output_type=labels.dtype)
    _, acc_update_op = tf.metrics.accuracy(labels=labels, predictions=preds)
    sess.run(tf.initialize_local_variables())
    for _ in range(FLAGS.eval_steps):
      acc = sess.run(acc_update_op)
    return acc

  pruning_utils.sparsify(sess, eval_model, pruning_strategy, pruning_params)
Exemplo n.º 15
0
  def train_eval_and_decode(self):
    """Does eval and decode after training every eval_freq_in_steps."""
    eval_steps = self._hparams.eval_freq_in_steps
    packed_dataset = "_packed" in self._hparams.problem.name
    mlperf_log.transformer_print(key=mlperf_log.TRAIN_LOOP)
    for i in range(0, self._train_spec.max_steps, eval_steps):
      mlperf_log.transformer_print(
          key=mlperf_log.TRAIN_EPOCH, value=i // eval_steps)
      if packed_dataset and i > 0:
        problem = registry.problem(self._hparams.problem.name + "_packed")
        p_hparams = problem.get_hparams(self._hparams)
        self._hparams.problem = problem
        self._hparams.problem_hparams = p_hparams
      self._estimator.train(
          self._train_spec.input_fn,
          steps=eval_steps,
          hooks=self._train_spec.hooks)
      self._estimator.evaluate(
          self._eval_spec.input_fn,
          steps=self._eval_spec.steps,
          hooks=self._eval_spec.hooks)
      if packed_dataset:
        problem = registry.problem(
            self._hparams.problem.name.replace("_packed", ""))
        p_hparams = problem.get_hparams(self._hparams)
        self._hparams.problem = problem
        self._hparams.problem_hparams = p_hparams
      mlperf_log.transformer_print(key=mlperf_log.EVAL_START)
      if self._hparams.mlperf_mode:
        self._decode_hparams.mlperf_decode_step = i + eval_steps
      self.decode(dataset_split=tf.estimator.ModeKeys.EVAL)
      d_hparams = self._decode_hparams
      if self._hparams.mlperf_mode and d_hparams.mlperf_success:
        mlperf_log.transformer_print(
            key=mlperf_log.RUN_STOP, value={"success": "true"})
        break

    d_hparams = self._decode_hparams
    if self._hparams.mlperf_mode and not d_hparams.mlperf_success:
      mlperf_log.transformer_print(
          key=mlperf_log.RUN_STOP, value={"success": "false"})
Exemplo n.º 16
0
  def get_environment_spec(self):
    env_spec = standard_atari_env_spec(self.env_name)
    env_spec.simulated_env = True
    env_spec.add_hparam("simulation_random_starts",
                        self.simulation_random_starts)

    env_spec.add_hparam("intrinsic_reward_scale", self.intrinsic_reward_scale)
    initial_frames_problem = registry.problem(self.initial_frames_problem)
    env_spec.add_hparam("initial_frames_problem", initial_frames_problem)
    env_spec.add_hparam("video_num_input_frames", self.num_input_frames)
    env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames)

    return env_spec
Exemplo n.º 17
0
 def translate(self, inputs):
     # Registrierung der Problem-Klasse
     problem = registry.problem(self.problem)
     # Instanziierung des HPrams-Objekts
     hparams = HParams(data_dir=os.path.expanduser(self.data_dir))
     problem.get_hparams(hparams)
     request_fn = self.make_request_fn()
     inputs = inputs
     # Prediction
     outputs = serving_utils.predict([inputs], problem, request_fn)
     outputs, = outputs
     output, score = outputs
     return {'inputs': inputs, 'outputs': output, 'scores': score}
Exemplo n.º 18
0
  def train_eval_and_decode(self):
    """Does eval and decode after training every eval_freq_in_steps."""
    eval_steps = self._hparams.eval_freq_in_steps
    packed_dataset = "_packed" in self._hparams.problem.name
    mlperf_log.transformer_print(key=mlperf_log.TRAIN_LOOP)
    for i in range(0, self._train_spec.max_steps, eval_steps):
      mlperf_log.transformer_print(
          key=mlperf_log.TRAIN_EPOCH, value=i // eval_steps)
      if packed_dataset and i > 0:
        problem = registry.problem(self._hparams.problem.name + "_packed")
        p_hparams = problem.get_hparams(self._hparams)
        self._hparams.problem = problem
        self._hparams.problem_hparams = p_hparams
      self._estimator.train(
          self._train_spec.input_fn,
          steps=eval_steps,
          hooks=self._train_spec.hooks)
      self._estimator.evaluate(
          self._eval_spec.input_fn,
          steps=self._eval_spec.steps,
          hooks=self._eval_spec.hooks)
      if packed_dataset:
        problem = registry.problem(
            self._hparams.problem.name.replace("_packed", ""))
        p_hparams = problem.get_hparams(self._hparams)
        self._hparams.problem = problem
        self._hparams.problem_hparams = p_hparams
      mlperf_log.transformer_print(key=mlperf_log.EVAL_START)
      self.decode(dataset_split=tf.estimator.ModeKeys.EVAL)
      d_hparams = self._decode_hparams
      if d_hparams.mlperf_mode and d_hparams.mlperf_success:
        mlperf_log.transformer_print(
            key=mlperf_log.RUN_STOP, value={"success": "true"})
        break

    d_hparams = self._decode_hparams
    if d_hparams.mlperf_mode and not d_hparams.mlperf_success:
      mlperf_log.transformer_print(
          key=mlperf_log.RUN_STOP, value={"success": "false"})
Exemplo n.º 19
0
def main(_):
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

    # Generate data if requested.
    data_dir = os.path.expanduser(FLAGS.data_dir)
    tmp_dir = os.path.expanduser(FLAGS.tmp_dir)

    problem_name = FLAGS.problems
    tf.logging.info("Generating data for %s" % problem_name)
    problem = registry.problem(problem_name)
    length = problem.get_length(data_dir, tmp_dir)

    length_statistics(length)
Exemplo n.º 20
0
 def __init__(self):
     tf.logging.set_verbosity(tf.logging.INFO)
     validate_flags()
     usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
     self.problem = registry.problem(FLAGS.problem)
     self.hparams = tf.contrib.training.HParams(
         data_dir=os.path.expanduser(FLAGS.data_dir))
     self.problem.get_hparams(self.hparams)
     self.request_fn = make_request_fn()
     self.tokenizer = MosesTokenizer('en')
     self.moses_detokenizer = MosesDetokenizer('zh')
     self.delimiter = re.compile(
         "(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s")
Exemplo n.º 21
0
 def __init__(self, batch_env):
   super(AutoencoderWrapper, self).__init__(batch_env)
   self._observ = tf.Variable(
       tf.zeros((len(self),) + self.observ_shape, self.observ_dtype),
       trainable=False)
   with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
     autoencoder_hparams = autoencoders.autoencoder_discrete_pong()
     problem = registry.problem("dummy_autoencoder_problem")
     autoencoder_hparams.problem_hparams = problem.get_hparams(
         autoencoder_hparams)
     autoencoder_hparams.problem = problem
     self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete(
         autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
Exemplo n.º 22
0
def query_t2t(input_txt, data_dir, problem_name, server_name, server_address,
              t2t_usr_dir):
    usr_dir.import_usr_dir(t2t_usr_dir)
    problem = registry.problem(problem_name)
    hparams = tf.contrib.training.HParams(
        data_dir=os.path.expanduser(data_dir))
    problem.get_hparams(hparams)
    request_fn = make_request_fn(server_name, server_address)
    inputs = input_txt
    outputs = serving_utils.predict([inputs], problem, request_fn)
    output, score = outputs
    print(output)
    return output, score
Exemplo n.º 23
0
  def get_environment_spec(self):
    env_spec = standard_atari_env_spec(self.env_name)
    env_spec.wrappers = [[tf_atari_wrappers.IntToBitWrapper, {}]]
    env_spec.simulated_env = True
    env_spec.add_hparam("simulation_random_starts", False)

    env_spec.add_hparam("intrinsic_reward_scale", 0.0)
    initial_frames_problem = registry.problem(self.initial_frames_problem)
    env_spec.add_hparam("initial_frames_problem", initial_frames_problem)
    env_spec.add_hparam("video_num_input_frames", self.num_input_frames)
    env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames)

    return env_spec
Exemplo n.º 24
0
def get_data_filepatterns(problems, data_dir, mode):
  """Return the location of a dataset for a given mode."""
  datasets = []
  for problem in problems.split("-"):
    try:
      problem = registry.problem(problem).dataset_filename()
    except ValueError:
      problem, _, _ = problem_hparams.parse_problem_name(problem)
    path = os.path.join(data_dir, problem)
    if mode == tf.estimator.ModeKeys.TRAIN:
      datasets.append("%s-train*" % path)
    else:
      datasets.append("%s-dev*" % path)
  return datasets
Exemplo n.º 25
0
def init():
    global input_encoder, output_decoder, fname
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info(
        "Trying to import poetry/trainer from {}".format(t2t_usr_dir))
    usr_dir.import_usr_dir(t2t_usr_dir)
    print(t2t_usr_dir)
    problem = registry.problem(problem_name)
    hparams = tf.contrib.training.HParams(
        data_dir=os.path.expanduser(data_dir))
    problem.get_hparams(hparams)
    fname = "inputs" if problem.has_inputs else "targets"
    input_encoder = problem.feature_info[fname].encoder
    output_decoder = problem.feature_info["targets"].encoder
Exemplo n.º 26
0
def train_agent(problem_name,
                agent_model_dir,
                event_dir,
                world_model_dir,
                epoch_data_dir,
                hparams,
                epoch=0,
                is_final_epoch=False):
    """Train the PPO agent in the simulated environment."""
    gym_problem = registry.problem(problem_name)
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
    ppo_params_names = [
        "epochs_num", "epoch_length", "learning_rate", "num_agents",
        "optimization_epochs"
    ]

    for param_name in ppo_params_names:
        ppo_param_name = "ppo_" + param_name
        if ppo_param_name in hparams:
            ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name))

    ppo_epochs_num = hparams.ppo_epochs_num
    if is_final_epoch:
        ppo_epochs_num *= 2
        ppo_hparams.epoch_length *= 2
    ppo_hparams.save_models_every_epochs = ppo_epochs_num
    ppo_hparams.world_model_dir = world_model_dir
    ppo_hparams.add_hparam("force_beginning_resets", True)

    # Adding model hparams for model specific adjustments
    model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
    ppo_hparams.add_hparam("model_hparams", model_hparams)

    environment_spec = copy.copy(gym_problem.environment_spec)
    environment_spec.simulation_random_starts = hparams.simulation_random_starts
    environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale

    ppo_hparams.add_hparam("environment_spec", environment_spec)

    with temporary_flags({
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "output_dir": world_model_dir,
            "data_dir": epoch_data_dir,
    }):
        rl_trainer_lib.train(ppo_hparams,
                             event_dir,
                             agent_model_dir,
                             epoch=epoch)
Exemplo n.º 27
0
    def get_environment_spec(self):
        env_spec = standard_atari_env_spec(self.env_name)
        env_spec.simulated_env = True
        env_spec.add_hparam("simulation_random_starts", False)
        env_spec.add_hparam("simulation_flip_first_random_for_beginning",
                            False)
        env_spec.add_hparam("intrinsic_reward_scale", 0.0)
        initial_frames_problem = registry.problem(self.initial_frames_problem)
        env_spec.add_hparam("initial_frames_problem", initial_frames_problem)
        env_spec.add_hparam("video_num_input_frames", self.num_input_frames)
        env_spec.add_hparam("video_num_target_frames",
                            self.video_num_target_frames)

        return env_spec
Exemplo n.º 28
0
  def testSingleEvalStepRawSession(self):
    """Illustrate how to run a T2T model in a raw session."""

    # Set model name, hparams, problems as would be set on command line.
    model_name = "transformer"
    FLAGS.hparams_set = "transformer_test"
    FLAGS.problems = "tiny_algo"
    data_dir = "/tmp"  # Used only when a vocab file or such like is needed.

    # Create the problem object, hparams, placeholders, features dict.
    encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir)
    hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir)
    trainer_utils.add_problem_hparams(hparams, FLAGS.problems)
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    # In INFER mode targets can be None.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    features = {
        "inputs": batch_inputs,
        "targets": batch_targets,
        "problem_choice": tf.constant(0),  # We run on the first problem here.
        "input_space_id": tf.constant(hparams.problems[0].input_space_id),
        "target_space_id": tf.constant(hparams.problems[0].target_space_id)
    }

    # Now set a mode and create the graph by invoking model_fn.
    mode = tf.estimator.ModeKeys.EVAL
    estimator_spec = model_builder.model_fn(
        model_name, features, mode, hparams, problem_names=[FLAGS.problems])
    predictions_dict = estimator_spec.predictions
    predictions = tf.squeeze(  # These are not images, axis=2,3 are not needed.
        predictions_dict["predictions"],
        axis=[2, 3])

    # Having the graph, let's run it on some data.
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      inputs = "0 1 0"
      targets = "0 1 0"
      # Encode from raw string to numpy input array using problem encoders.
      inputs_numpy = encoders["inputs"].encode(inputs)
      targets_numpy = encoders["targets"].encode(targets)
      # Feed the encoded inputs and targets and run session.
      feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
      np_predictions = sess.run(predictions, feed)
      # Check that the result has the correct shape: batch x length x vocab_size
      #   where, for us, batch = 1, length = 3, vocab_size = 4.
      self.assertEqual(np_predictions.shape, (1, 3, 4))
Exemplo n.º 29
0
  def testSingleTrainStepCall(self):
    """Illustrate how to run a T2T model in a raw session."""

    # Set model name, hparams, problems as would be set on command line.
    model_name = "transformer"
    FLAGS.hparams_set = "transformer_test"
    FLAGS.problems = "tiny_algo"
    data_dir = "/tmp"  # Used only when a vocab file or such like is needed.

    # Create the problem object, hparams, placeholders, features dict.
    encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir)
    hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir)
    trainer_utils.add_problem_hparams(hparams, FLAGS.problems)

    # Now set a mode and create the model.
    mode = tf.estimator.ModeKeys.TRAIN
    model = registry.model(model_name)(hparams, mode)

    # Create placeholder for features and make them batch-sized.
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    features = {
        "inputs": batch_inputs,
        "targets": batch_targets,
        "target_space_id": tf.constant(hparams.problems[0].target_space_id)
    }

    # Call the model.
    predictions, _ = model(features)
    nvars = len(tf.trainable_variables())
    model(features)  # Call again and check that reuse works.
    self.assertEqual(nvars, len(tf.trainable_variables()))

    # Having the graph, let's run it on some data.
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      inputs = "0 1 0"
      targets = "0 1 0"
      # Encode from raw string to numpy input array using problem encoders.
      inputs_numpy = encoders["inputs"].encode(inputs)
      targets_numpy = encoders["targets"].encode(targets)
      # Feed the encoded inputs and targets and run session.
      feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
      np_predictions = sess.run(predictions, feed)
      # Check that the result has the correct shape: batch x length x vocab_size
      #   where, for us, batch = 1, length = 3, vocab_size = 4.
      self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4))
Exemplo n.º 30
0
def decode(estimator, hparams, decode_hp):
    """Decode from estimator. Interactive, from file, or from dataset."""
    if FLAGS.decode_interactive:
        if estimator.config.use_tpu:
            raise ValueError("TPU can only decode from dataset.")
        decoding.decode_interactively(estimator,
                                      hparams,
                                      decode_hp,
                                      checkpoint_path=FLAGS.checkpoint_path)
    elif FLAGS.decode_from_file:
        if estimator.config.use_tpu:
            raise ValueError("TPU can only decode from dataset.")
        decoding.decode_from_file(estimator,
                                  FLAGS.decode_from_file,
                                  hparams,
                                  decode_hp,
                                  FLAGS.decode_to_file,
                                  checkpoint_path=FLAGS.checkpoint_path)
        if FLAGS.checkpoint_path and FLAGS.keep_timestamp:
            ckpt_time = os.path.getmtime(FLAGS.checkpoint_path + ".index")
            os.utime(FLAGS.decode_to_file, (ckpt_time, ckpt_time))
    else:

        # Fathom
        predictions = decoding.decode_from_dataset(
            estimator,
            FLAGS.problem,
            hparams,
            decode_hp,
            decode_to_file=FLAGS.decode_to_file,
            dataset_split=dataset_to_t2t_mode(FLAGS.dataset_split),
            return_generator=FLAGS.fathom_output_predictions,
            # save logs/summaries to a directory with the same name as decode_output_file
            # in situations where we are calling decode without write permissions
            # to the model directory
            output_dir=os.path.splitext(FLAGS.decode_output_file)[0])

        # Fathom
        if FLAGS.fathom_output_predictions:
            print('Assuming only one problem...')
            assert '-' not in FLAGS.problems
            # if we already have built problem instance in hparams, no need to create
            # it second time (as it's downloading files from gcs)
            if hasattr(hparams, 'problem'):
                problem = hparams.problem
            else:
                problem = registry.problem(FLAGS.problems)
            problem.output_predictions(predictions=predictions,
                                       num_examples=FLAGS.num_examples)
Exemplo n.º 31
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    validate_flags()
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
    problem = registry.problem(FLAGS.problem)
    hparams = hparam.HParams(data_dir=os.path.expanduser(FLAGS.data_dir))
    problem.get_hparams(hparams)
    request_fn = make_request_fn()
    while True:
        inputs = FLAGS.inputs_once if FLAGS.inputs_once else input(">> ")
        t1 = datetime.datetime.now()
        outputs = serving_utils.predict([inputs], problem, request_fn)
        t2 = datetime.datetime.now()
        time_taken_for_response = int((t2 - t1).total_seconds() * 1000)
        #print("time:", time_taken_for_response)
        outputs, = outputs
        output, score = outputs
        if len(score.shape) > 0:  # pylint: disable=g-explicit-length-test
            print_str = """
Input:
{inputs}

Output (Scores [{score}]) (Time [{time}] milliseconds):
{output}
                """
            #time_taken_for_response = (t2 - t1) / 1000.0
            score_text = ",".join(["{:.3f}".format(s) for s in score])
            print(
                print_str.format(inputs=inputs,
                                 output=output,
                                 score=score_text,
                                 time=time_taken_for_response))
        else:
            print_str = """
Input:
{inputs}

Output (Score {score:.3f}) (Time {time} milliseconds):
{output}
                """
            #time_taken_for_response = (t2 - t1) / 1000.0
            print(
                print_str.format(inputs=inputs,
                                 output=output,
                                 score=score,
                                 time=time_taken_for_response))

        if FLAGS.inputs_once:
            break
Exemplo n.º 32
0
  def testSingleEvalStepRawSession(self):
    """Illustrate how to run a T2T model in a raw session."""

    # Set model name, hparams, problems as would be set on command line.
    model_name = "transformer"
    FLAGS.hparams_set = "transformer_test"
    FLAGS.problems = "tiny_algo"
    data_dir = "/tmp"  # Used only when a vocab file or such like is needed.

    # Create the problem object, hparams, placeholders, features dict.
    encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir)
    hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir)
    trainer_utils.add_problem_hparams(hparams, FLAGS.problems)
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    # In INFER mode targets can be None.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    features = {
        "inputs": batch_inputs,
        "targets": batch_targets,
        "problem_choice": 0,  # We run on the first problem here.
        "input_space_id": hparams.problems[0].input_space_id,
        "target_space_id": hparams.problems[0].target_space_id
    }

    # Now set a mode and create the graph by invoking model_fn.
    mode = tf.estimator.ModeKeys.EVAL
    estimator_spec = model_builder.model_fn(
        model_name, features, mode, hparams, problem_names=[FLAGS.problems])
    predictions_dict = estimator_spec.predictions
    predictions = tf.squeeze(  # These are not images, axis=2,3 are not needed.
        predictions_dict["predictions"],
        axis=[2, 3])

    # Having the graph, let's run it on some data.
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      inputs = "0 1 0"
      targets = "0 1 0"
      # Encode from raw string to numpy input array using problem encoders.
      inputs_numpy = encoders["inputs"].encode(inputs)
      targets_numpy = encoders["targets"].encode(targets)
      # Feed the encoded inputs and targets and run session.
      feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
      np_predictions = sess.run(predictions, feed)
      # Check that the result has the correct shape: batch x length x vocab_size
      #   where, for us, batch = 1, length = 3, vocab_size = 4.
      self.assertEqual(np_predictions.shape, (1, 3, 4))
Exemplo n.º 33
0
    def testSingleTrainStepCall(self):
        """Illustrate how to run a T2T model in a raw session."""

        # Set model name, hparams, problems as would be set on command line.
        model_name = "transformer"
        FLAGS.hparams_set = "transformer_test"
        FLAGS.problems = "tiny_algo"
        data_dir = "/tmp"  # Used only when a vocab file or such like is needed.

        # Create the problem object, hparams, placeholders, features dict.
        encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir)
        hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir)
        trainer_utils.add_problem_hparams(hparams, FLAGS.problems)

        # Now set a mode and create the model.
        mode = tf.estimator.ModeKeys.TRAIN
        model = registry.model(model_name)(hparams, mode)

        # Create placeholder for features and make them batch-sized.
        inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
        batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
        targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
        batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
        features = {
            "inputs": batch_inputs,
            "targets": batch_targets,
            "target_space_id": tf.constant(hparams.problems[0].target_space_id)
        }

        # Call the model.
        predictions, _ = model(features)
        nvars = len(tf.trainable_variables())
        model(features)  # Call again and check that reuse works.
        self.assertEqual(nvars, len(tf.trainable_variables()))

        # Having the graph, let's run it on some data.
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            inputs = "0 1 0"
            targets = "0 1 0"
            # Encode from raw string to numpy input array using problem encoders.
            inputs_numpy = encoders["inputs"].encode(inputs)
            targets_numpy = encoders["targets"].encode(targets)
            # Feed the encoded inputs and targets and run session.
            feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
            np_predictions = sess.run(predictions, feed)
            # Check that the result has the correct shape: batch x length x vocab_size
            #   where, for us, batch = 1, length = 3, vocab_size = 4.
            self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4))
Exemplo n.º 34
0
  def get_predictions(self, num_decodes=2):
    rng = np.random.RandomState(0)
    # num_samples=4
    inputs = rng.randint(0, 255, (4, 2, 64, 64, 3))
    outputs = rng.randint(0, 255, (4, 5, 64, 64, 3))
    targets = rng.randint(0, 255, (4, 5, 64, 64, 3))
    predictions = []
    for input_, output, target in zip(inputs, outputs, targets):
      curr_pred = {"inputs": input_, "outputs": output, "targets": target}
      predictions.append(curr_pred)

    # num_decodes=2
    predictions = [predictions] * num_decodes
    problem = registry.problem("video_stochastic_shapes10k")
    return predictions, problem
Exemplo n.º 35
0
 def train_eval_and_decode(self):
     """Does eval and decode after training every eval_freq_in_steps."""
     eval_steps = self._hparams.eval_freq_in_steps
     packed_dataset = "_packed" in self._hparams.problem.name
     for i in range(0, self._train_spec.max_steps, eval_steps):
         if packed_dataset and i > 0:
             problem = registry.problem(self._hparams.problem.name +
                                        "_packed")
             p_hparams = problem.get_hparams(self._hparams)
             self._hparams.problem = problem
             self._hparams.problem_hparams = p_hparams
         self._estimator.train(self._train_spec.input_fn,
                               steps=eval_steps,
                               hooks=self._train_spec.hooks)
         self._estimator.evaluate(self._eval_spec.input_fn,
                                  steps=self._eval_spec.steps,
                                  hooks=self._eval_spec.hooks)
         if packed_dataset:
             problem = registry.problem(
                 self._hparams.problem.name.replace("_packed", ""))
             p_hparams = problem.get_hparams(self._hparams)
             self._hparams.problem = problem
             self._hparams.problem_hparams = p_hparams
         self.decode(dataset_split=tf.estimator.ModeKeys.EVAL)
Exemplo n.º 36
0
def evaluate_world_model(simulated_problem_name, problem_name, hparams,
                         world_model_dir, epoch_data_dir, tmp_dir,
                         autoencoder_path=None):
  """Generate simulated environment data and return reward accuracy."""
  gym_simulated_problem = registry.problem(simulated_problem_name)
  gym_problem = registry.problem(problem_name)
  sim_steps = hparams.simulated_env_generator_num_steps
  gym_simulated_problem.settable_num_steps = sim_steps
  gym_simulated_problem.real_env_problem = gym_problem
  gym_simulated_problem.simulation_random_starts = False
  gym_simulated_problem.intrinsic_reward_scale = 0.
  with temporary_flags({
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "data_dir": epoch_data_dir,
      "output_dir": world_model_dir,
      "autoencoder_path": autoencoder_path,
  }):
    gym_simulated_problem.generate_data(epoch_data_dir, tmp_dir)
  n = max(1., gym_simulated_problem.dones)
  model_reward_accuracy = (
      gym_simulated_problem.successful_episode_reward_predictions / float(n))
  return model_reward_accuracy
Exemplo n.º 37
0
  def get_environment_spec(self):
    env_spec = standard_atari_env_spec(
        self.env_name,
        simulated=True,
        resize_height_factor=self.resize_height_factor,
        resize_width_factor=self.resize_width_factor)
    env_spec.add_hparam("simulation_random_starts", True)
    env_spec.add_hparam("simulation_flip_first_random_for_beginning", True)
    env_spec.add_hparam("intrinsic_reward_scale", 0.0)
    initial_frames_problem = registry.problem(self.initial_frames_problem)
    env_spec.add_hparam("initial_frames_problem", initial_frames_problem)
    env_spec.add_hparam("video_num_input_frames", self.num_input_frames)
    env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames)

    return env_spec
Exemplo n.º 38
0
def train_agent(problem_name,
                agent_model_dir,
                event_dir,
                world_model_dir,
                epoch_data_dir,
                hparams,
                autoencoder_path=None,
                epoch=0):
    """Train the PPO agent in the simulated environment."""
    gym_problem = registry.problem(problem_name)
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
    ppo_epochs_num = hparams.ppo_epochs_num
    ppo_hparams.epochs_num = ppo_epochs_num
    ppo_hparams.simulated_environment = True
    ppo_hparams.simulation_random_starts = hparams.simulation_random_starts
    ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale
    ppo_hparams.eval_every_epochs = 50
    ppo_hparams.save_models_every_epochs = ppo_epochs_num
    ppo_hparams.epoch_length = hparams.ppo_epoch_length
    ppo_hparams.num_agents = hparams.ppo_num_agents
    ppo_hparams.problem = gym_problem
    ppo_hparams.world_model_dir = world_model_dir
    if hparams.ppo_learning_rate:
        ppo_hparams.learning_rate = hparams.ppo_learning_rate
    # 4x for the StackAndSkipWrapper minus one to always finish for reporting.
    ppo_time_limit = (ppo_hparams.epoch_length - 1) * 4

    in_graph_wrappers = [(TimeLimitWrapper, {
        "timelimit": ppo_time_limit
    }), (StackAndSkipWrapper, {
        "skip": 4
    })]
    in_graph_wrappers += gym_problem.in_graph_wrappers
    ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)

    with temporary_flags({
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "output_dir": world_model_dir,
            "data_dir": epoch_data_dir,
            "autoencoder_path": autoencoder_path,
    }):
        rl_trainer_lib.train(ppo_hparams,
                             gym_problem.env_name,
                             event_dir,
                             agent_model_dir,
                             epoch=epoch)
Exemplo n.º 39
0
  def get_environment_spec(self):
    env_spec = rl.standard_atari_env_spec(self.env_name)
    env_spec.wrappers = [
        [tf_atari_wrappers.IntToBitWrapper, {}],
        [tf_atari_wrappers.StackWrapper, {"history": 4}]
    ]
    env_spec.simulated_env = True
    env_spec.add_hparam("simulation_random_starts", True)
    env_spec.add_hparam("simulation_flip_first_random_for_beginning", True)
    env_spec.add_hparam("intrinsic_reward_scale", 0.0)
    initial_frames_problem = registry.problem(self.initial_frames_problem)
    env_spec.add_hparam("initial_frames_problem", initial_frames_problem)
    env_spec.add_hparam("video_num_input_frames", self.num_input_frames)
    env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames)

    return env_spec
Exemplo n.º 40
0
def add_problem_hparams(hparams, problems):
  """Add problem hparams for the problems."""
  hparams.problems = []
  hparams.problem_instances = []
  for problem_name in problems.split("-"):
    try:
      problem = registry.problem(problem_name)
      p_hparams = problem.internal_hparams(hparams)
    except ValueError:
      problem = None
      p_hparams = problem_hparams.problem_hparams(problem_name, hparams)

    hparams.problem_instances.append(problem)
    hparams.problems.append(p_hparams)

  return hparams
Exemplo n.º 41
0
    def __init__(self, FLAGS, server_address='127.0.0.1:9000'):
        print('Initializing up2down_class.......')
        self.FLAGS = FLAGS
        self.server_address = server_address
        tf.logging.set_verbosity(tf.logging.ERROR)

        usr_dir.import_usr_dir(self.FLAGS.t2t_usr_dir)
        # hparams: not important but necessary, an assertion error will be raised without hparams.
        self.hparams = hparam.HParams(
            data_dir=os.path.expanduser(self.FLAGS.t2t_usr_dir))
        # problem
        self.problem = registry.problem(self.FLAGS.problem)
        self.problem.get_hparams(self.hparams)
        # model request server
        self.request_fn = self.make_request_fn(self.FLAGS.model,
                                               self.server_address)
Exemplo n.º 42
0
def add_problem_hparams(hparams, problems):
  """Add problem hparams for the problems."""
  hparams.problems = []
  hparams.problem_instances = []
  for problem_name in problems.split("-"):
    try:
      problem = registry.problem(problem_name)
    except LookupError:
      all_problem_names = sorted(registry.list_problems())
      error_lines = ["%s not in the set of supported problems:" % problem_name
                    ] + all_problem_names
      error_msg = "\n  * ".join(error_lines)
      raise LookupError(error_msg)
    p_hparams = problem.get_hparams(hparams)

    hparams.problem_instances.append(problem)
    hparams.problems.append(p_hparams)
Exemplo n.º 43
0
    def _add_problem_hparams(self, hparams, problem_name):
        """Add problem hparams for the problems. 

        This method corresponds to create_hparams() in tensor2tensor's
        trainer_lib module, but replaces the feature encoders with
        DummyFeatureEncoder's.

        Args:
            hparams (Hparams): Model hyper parameters.
            problem_name (string): T2T problem name.
        
        Returns:
            hparams object.

        Raises:
            LookupError if the problem name is not in the registry or
            uses the old style problem_hparams.
        """
        if self.pop_id >= 0:
            try:
                hparams.add_hparam("pop_id", self.pop_id)
            except:
                if hparams.pop_id != self.pop_id:
                    logging.warn("T2T pop_id does not match (%d!=%d)"
                                 % (hparams.pop_id, self.pop_id))
        try:
            hparams.add_hparam("max_terminal_id", self.max_terminal_id)
        except:
            if hparams.max_terminal_id != self.max_terminal_id:
                logging.warn("T2T max_terminal_id does not match (%d!=%d)"
                             % (hparams.max_terminal_id, self.max_terminal_id))
        try:
            hparams.add_hparam("closing_bracket_id", self.pop_id)
        except:
            if hparams.closing_bracket_id != self.pop_id:
                logging.warn("T2T closing_bracket_id does not match (%d!=%d)"
                             % (hparams.closing_bracket_id, self.pop_id))
        problem = registry.problem(problem_name)
        problem._encoders = {
            "inputs": DummyTextEncoder(vocab_size=self.src_vocab_size),
            "targets": DummyTextEncoder(vocab_size=self.trg_vocab_size)
        }
        p_hparams = problem.get_hparams(hparams)
        hparams.problem = problem
        hparams.problem_hparams = p_hparams
        return hparams
Exemplo n.º 44
0
def generate_real_env_data(problem_name, agent_policy_path, hparams, data_dir,
                           tmp_dir, autoencoder_path=None, eval_phase=False):
  """Run the agent against the real environment and return mean reward."""
  tf.gfile.MakeDirs(data_dir)
  with temporary_flags({
      "problem": problem_name,
      "agent_policy_path": agent_policy_path,
      "autoencoder_path": autoencoder_path,
      "only_use_ae_for_policy": True,
  }):
    gym_problem = registry.problem(problem_name)
    gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
    gym_problem.eval_phase = eval_phase
    gym_problem.generate_data(data_dir, tmp_dir)
    mean_reward = gym_problem.sum_of_rewards / (1.0 + gym_problem.dones)

  return mean_reward
Exemplo n.º 45
0
    def _create_hparams(self, src_vocab_size, trg_vocab_size, hparams_set_name,
                        problem_name):
        """Creates hparams object.

        This method corresponds to create_hparams() in tensor2tensor's
        trainer_utils module, but replaces the feature encoders with
        DummyFeatureEncoder's.

        Args:
            src_vocab_size (int): Source vocabulary size.
            trg_vocab_size (int): Target vocabulary size.
            hparams_set_name (string): T2T hparams set name.
            problem_name (string): T2T problem name.

        Returns:
            hparams object.

        Raises:
            LookupError if the problem name is not in the registry or
            uses the old style problem_hparams.
        """
        hparams = registry.hparams(hparams_set_name)()
        problem = registry.problem(problem_name)
        # The following hack is necessary to prevent the problem from creating
        # the default TextEncoders, which would fail due to the lack of a
        # vocabulary file.
        problem._encoders = {
            "inputs": DummyTextEncoder(vocab_size=src_vocab_size),
            "targets": DummyTextEncoder(vocab_size=trg_vocab_size)
        }
        try:
            hparams.add_hparam("max_terminal_id", self.max_terminal_id)
        except:
            if hparams.max_terminal_id != self.max_terminal_id:
                logging.warn("T2T max_terminal_id does not match (%d!=%d)" %
                             (hparams.max_terminal_id, self.max_terminal_id))
        try:
            hparams.add_hparam("closing_bracket_id", self.pop_id)
        except:
            if hparams.closing_bracket_id != self.pop_id:
                logging.warn("T2T closing_bracket_id does not match (%d!=%d)" %
                             (hparams.closing_bracket_id, self.pop_id))
        p_hparams = problem.get_hparams(hparams)
        hparams.problem_instances = [problem]
        hparams.problems = [p_hparams]
        return hparams
Exemplo n.º 46
0
    def _add_problem_hparams(self, hparams, problem_name):
        """Add problem hparams for the problems. 

        This method corresponds to create_hparams() in tensor2tensor's
        trainer_lib module, but replaces the feature encoders with
        DummyFeatureEncoder's.

        Args:
            hparams (Hparams): Model hyper parameters.
            problem_name (string): T2T problem name.
        
        Returns:
            hparams object.

        Raises:
            LookupError if the problem name is not in the registry or
            uses the old style problem_hparams.
        """
        if self.pop_id >= 0:
            try:
                hparams.add_hparam("pop_id", self.pop_id)
            except:
                if hparams.pop_id != self.pop_id:
                    logging.warn("T2T pop_id does not match (%d!=%d)"
                                 % (hparams.pop_id, self.pop_id))
        try:
            hparams.add_hparam("max_terminal_id", self.max_terminal_id)
        except:
            if hparams.max_terminal_id != self.max_terminal_id:
                logging.warn("T2T max_terminal_id does not match (%d!=%d)"
                             % (hparams.max_terminal_id, self.max_terminal_id))
        try:
            hparams.add_hparam("closing_bracket_id", self.pop_id)
        except:
            if hparams.closing_bracket_id != self.pop_id:
                logging.warn("T2T closing_bracket_id does not match (%d!=%d)"
                             % (hparams.closing_bracket_id, self.pop_id))
        problem = registry.problem(problem_name)
        problem._encoders = {
            "inputs": DummyTextEncoder(vocab_size=self.src_vocab_size),
            "targets": DummyTextEncoder(vocab_size=self.trg_vocab_size)
        }
        p_hparams = problem.get_hparams(hparams)
        hparams.problem = problem
        hparams.problem_hparams = p_hparams
        return hparams
Exemplo n.º 47
0
def add_problem_hparams(hparams, problems):
  """Add problem hparams for the problems."""
  hparams.problems = []
  hparams.problem_instances = []
  for problem_name in problems.split("-"):
    try:
      problem = registry.problem(problem_name)
    except LookupError:
      all_problem_names = sorted(registry.list_problems())
      error_lines = ["%s not in the set of supported problems:" % problem_name
                    ] + all_problem_names
      error_msg = "\n  * ".join(error_lines)
      raise LookupError(error_msg)
    p_hparams = problem.get_hparams(hparams)

    hparams.problem_instances.append(problem)
    hparams.problems.append(p_hparams)
Exemplo n.º 48
0
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path,
                      epoch_data_dir):
    """Encode all frames from problem_name and write out as ae_problem_name."""
    with tf.Graph().as_default():
        ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong",
                                                problem_name=problem_name)
        problem = ae_hparams.problem
        model = registry.model("autoencoder_ordered_discrete")(
            ae_hparams, tf.estimator.ModeKeys.EVAL)

        ae_problem = registry.problem(ae_problem_name)
        ae_training_paths = ae_problem.training_filepaths(
            epoch_data_dir, 10, True)
        ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True)

        skip_train = False
        skip_eval = False
        for path in ae_training_paths:
            if tf.gfile.Exists(path):
                skip_train = True
                break
        for path in ae_eval_paths:
            if tf.gfile.Exists(path):
                skip_eval = True
                break

        # Encode train data
        if not skip_train:
            dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                      epoch_data_dir,
                                      shuffle_files=False,
                                      output_buffer_size=100,
                                      preprocess=False)
            encode_dataset(model, dataset, problem, ae_hparams,
                           autoencoder_path, ae_training_paths)

        # Encode eval data
        if not skip_eval:
            dataset = problem.dataset(tf.estimator.ModeKeys.EVAL,
                                      epoch_data_dir,
                                      shuffle_files=False,
                                      output_buffer_size=100,
                                      preprocess=False)
            encode_dataset(model, dataset, problem, ae_hparams,
                           autoencoder_path, ae_eval_paths)
Exemplo n.º 49
0
    def __init__(self, config):
        os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_DEVICE
        FLAGS.data_dir = config.VOCAB_DIR
        FLAGS.problems = config.PROBLEM_NAME
        FLAGS.model = config.MODEL_NAME
        FLAGS.hparams_set = config.HPARAMS_SET
        FLAGS.output_dir = config.MODEL_DIR
        FLAGS.decode_hparams = config.DECODE_HPARAMS
        batch_size = config.BATCH_SIZE

        self.hparams = create_hparams()
        self.encoders = registry.problem(FLAGS.problems).feature_encoders(
            FLAGS.data_dir)
        self.ckpt = tf.train.get_checkpoint_state(
            FLAGS.output_dir).model_checkpoint_path

        self.inputs_ph = tf.placeholder(
            shape=(batch_size, None), dtype=tf.int32)  # Just length dimension.
        self.batch_inputs = tf.reshape(self.inputs_ph,
                                       [batch_size, -1, 1, 1])  # Make it 4D.
        self.features = {"inputs": self.batch_inputs}
        # Prepare the model and the graph when model runs on features.
        tf.logging.info(f"[{file_name}] SessFieldPredict: register T2TModel")
        self.model = registry.model(FLAGS.model)(self.hparams,
                                                 tf.estimator.ModeKeys.PREDICT)
        self.model_spec = self.model.estimator_spec_predict(self.features)
        self.prediction = self.model_spec.predictions

        self.inputs_vocab = self.hparams.problems[0].vocabulary["inputs"]
        self.targets_vocab = self.hparams.problems[0].vocabulary["targets"]
        self.problem_name = FLAGS.problems

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.GPU_MEM_FRAC)
        self.sess_config = tf.ConfigProto(gpu_options=gpu_options)
        self.sess_config.gpu_options.allow_growth = config.GPU_MEM_GROWTH

        self.batch_size = batch_size
        tf.logging.info(f"[{file_name}] SessFieldPredict: registered")

        self.sess = tf.Session(config=self.sess_config)
        saver = tf.train.Saver()
        tf.logging.info(f"[{file_name}] Decode: model loading ... ")
        saver.restore(self.sess, self.ckpt)
        tf.logging.info(f"[{file_name}] Decode: model loaded.")
Exemplo n.º 50
0
  def __init__(self,
               model_name,
               problem_name,
               hparams_set,
               queries,
               output_dir=None,
               data_dir=None,
               model_dir=None,
               tmp_dir=None,
               export_dir=None,
               decode_hparams="",
               default_tmp=None,
               tfms_path=None,
               mode="train"):

    self.model_name = model_name
    self.model = registry.model(model_name)

    self.problem_name = problem_name
    self.hparams_set = hparams_set

    self.queries = queries

    self.problem = registry.problem(self.problem_name)
    self.problem.mode = mode

    tmp = tempfile.mkdtemp() if default_tmp is None else default_tmp

    self.output_dir = output_dir if output_dir is not None else tmp
    self.data_dir = data_dir if data_dir is not None else tmp
    self.model_dir = model_dir if model_dir is not None else tmp
    self.tmp_dir = tmp_dir if tmp_dir is not None else tmp
    self.export_dir = (export_dir if export_dir is not None else tmp +
                       "/export")

    self.decode_hparams = decode_hparams

    # HACK
    self.tf_model_server_path = tfms_path

    self.train_dataset = None

    self.has_run_datagen = False

    self._lookup_hparams()
Exemplo n.º 51
0
def generate_real_env_data(problem_name, agent_policy_path, hparams, data_dir,
                           tmp_dir, autoencoder_path=None, eval_phase=False):
  """Run the agent against the real environment and return mean reward."""
  tf.gfile.MakeDirs(data_dir)
  with temporary_flags({
      "problem": problem_name,
      "agent_policy_path": agent_policy_path,
      "autoencoder_path": autoencoder_path,
      "only_use_ae_for_policy": True,
  }):
    gym_problem = registry.problem(problem_name)
    gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
    gym_problem.eval_phase = eval_phase
    gym_problem.generate_data(data_dir, tmp_dir)
    mean_reward = gym_problem.statistics.sum_of_rewards / \
                  (1.0 + gym_problem.statistics.number_of_dones)

  return mean_reward
 def _testImg2imgTransformer(self, net):
   batch_size = 3
   hparams = image_transformer_2d.img2img_transformer2d_tiny()
   hparams.data_dir = ""
   p_hparams = registry.problem("image_celeba").get_hparams(hparams)
   inputs = np.random.random_integers(0, high=255, size=(3, 4, 4, 3))
   targets = np.random.random_integers(0, high=255, size=(3, 8, 8, 3))
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(inputs, dtype=tf.int32),
         "targets": tf.constant(targets, dtype=tf.int32),
         "target_space_id": tf.constant(1, dtype=tf.int32),
     }
     model = net(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
     logits, _ = model(features)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (batch_size, 8, 8, 3, 256))
Exemplo n.º 53
0
 def testMultiModel(self):
   x = np.random.random_integers(0, high=255, size=(3, 5, 5, 3))
   y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1))
   hparams = multimodel.multimodel_tiny()
   hparams.add_hparam("data_dir", "")
   problem = registry.problem("image_cifar10")
   p_hparams = problem.get_hparams(hparams)
   hparams.problems = [p_hparams]
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(x, dtype=tf.int32),
         "targets": tf.constant(y, dtype=tf.int32),
         "target_space_id": tf.constant(1, dtype=tf.int32),
     }
     model = multimodel.MultiModel(
         hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
     logits, _ = model(features)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (3, 1, 1, 1, 10))
Exemplo n.º 54
0
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path,
                      epoch_data_dir):
  """Encode all frames from problem_name and write out as ae_problem_name."""
  with tf.Graph().as_default():
    ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong",
                                            problem_name=problem_name)
    problem = ae_hparams.problem
    model = registry.model("autoencoder_ordered_discrete")(
        ae_hparams, tf.estimator.ModeKeys.EVAL)

    ae_problem = registry.problem(ae_problem_name)
    ae_training_paths = ae_problem.training_filepaths(epoch_data_dir, 10, True)
    ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True)

    skip_train = False
    skip_eval = False
    for path in ae_training_paths:
      if tf.gfile.Exists(path):
        skip_train = True
        break
    for path in ae_eval_paths:
      if tf.gfile.Exists(path):
        skip_eval = True
        break

    # Encode train data
    if not skip_train:
      dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_training_paths)

    # Encode eval data
    if not skip_eval:
      dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_eval_paths)
Exemplo n.º 55
0
def train_agent(problem_name, agent_model_dir,
                event_dir, world_model_dir, epoch_data_dir, hparams,
                autoencoder_path=None, epoch=0):
  """Train the PPO agent in the simulated environment."""
  gym_problem = registry.problem(problem_name)
  ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
  ppo_params_names = ["epochs_num", "epoch_length",
                      "learning_rate", "num_agents",
                      "optimization_epochs"]

  for param_name in ppo_params_names:
    ppo_param_name = "ppo_"+ param_name
    if ppo_param_name in hparams:
      ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name))

  ppo_epochs_num = hparams.ppo_epochs_num
  ppo_hparams.save_models_every_epochs = ppo_epochs_num
  ppo_hparams.world_model_dir = world_model_dir
  ppo_hparams.add_hparam("force_beginning_resets", True)

  # Adding model hparams for model specific adjustments
  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  ppo_hparams.add_hparam("model_hparams", model_hparams)

  environment_spec = copy.copy(gym_problem.environment_spec)
  environment_spec.simulation_random_starts = hparams.simulation_random_starts
  environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale

  ppo_hparams.add_hparam("environment_spec", environment_spec)

  with temporary_flags({
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "output_dir": world_model_dir,
      "data_dir": epoch_data_dir,
      "autoencoder_path": autoencoder_path,
  }):
    rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
Exemplo n.º 56
0
def generate_data_for_registered_problem(problem_name):
  tf.logging.info("Generating data for %s.", problem_name)
  if FLAGS.num_shards:
    raise ValueError("--num_shards should not be set for registered Problem.")
  problem = registry.problem(problem_name)
  task_id = None if FLAGS.task_id < 0 else FLAGS.task_id
  data_dir = os.path.expanduser(FLAGS.data_dir)
  tmp_dir = os.path.expanduser(FLAGS.tmp_dir)
  if task_id is None and problem.multiprocess_generate:
    if FLAGS.task_id_start != -1:
      assert FLAGS.task_id_end != -1
      task_id_start = FLAGS.task_id_start
      task_id_end = FLAGS.task_id_end
    else:
      task_id_start = 0
      task_id_end = problem.num_generate_tasks
    pool = multiprocessing.Pool(processes=FLAGS.num_concurrent_processes)
    problem.prepare_to_generate(data_dir, tmp_dir)
    args = [(problem_name, data_dir, tmp_dir, task_id)
            for task_id in range(task_id_start, task_id_end)]
    pool.map(generate_data_in_process, args)
  else:
    problem.generate_data(data_dir, tmp_dir, task_id)
Exemplo n.º 57
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  problem = registry.problem(FLAGS.problem)
  hparams = tf.contrib.training.HParams(
      data_dir=os.path.expanduser(FLAGS.data_dir))
  problem.get_hparams(hparams)

  fname = "inputs" if problem.has_inputs else "targets"
  input_encoder = problem.feature_info[fname].encoder
  output_decoder = problem.feature_info["targets"].encoder

  stub = create_stub()

  while True:
    prompt = ">> "
    if FLAGS.inputs_once:
      inputs = FLAGS.inputs_once
    else:
      inputs = input(prompt)

    input_ids = encode(inputs, input_encoder)
    output_ids = query(stub, input_ids, feature_name=fname)

    outputs = decode(output_ids, output_decoder)

    print_str = """
Input:
{inputs}

Output:
{outputs}
    """
    print(print_str.format(inputs=inputs, outputs=outputs))
    if FLAGS.inputs_once:
      break
Exemplo n.º 58
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  validate_flags()
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
  problem = registry.problem(FLAGS.problem)
  hparams = tf.contrib.training.HParams(
      data_dir=os.path.expanduser(FLAGS.data_dir))
  problem.get_hparams(hparams)
  request_fn = make_request_fn()
  while True:
    inputs = FLAGS.inputs_once if FLAGS.inputs_once else input(">> ")
    outputs = serving_utils.predict([inputs], problem, request_fn)
    outputs, = outputs
    output, score = outputs
    print_str = """
Input:
{inputs}

Output (Score {score:.3f}):
{output}
    """
    print(print_str.format(inputs=inputs, output=output, score=score))
    if FLAGS.inputs_once:
      break
Exemplo n.º 59
0
def evaluate_world_model(simulated_problem_name, problem_name, hparams,
                         world_model_dir, epoch_data_dir, tmp_dir,
                         autoencoder_path=None):
  """Generate simulated environment data and return reward accuracy."""
  gym_simulated_problem = registry.problem(simulated_problem_name)
  sim_steps = hparams.simulated_env_generator_num_steps
  gym_simulated_problem.settable_num_steps = sim_steps
  with temporary_flags({
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "data_dir": epoch_data_dir,
      "output_dir": world_model_dir,
      "autoencoder_path": autoencoder_path,
  }):
    gym_simulated_problem.generate_data(epoch_data_dir, tmp_dir)
  n = max(1., gym_simulated_problem.statistics.number_of_dones)
  model_reward_accuracy = (
      gym_simulated_problem.statistics.successful_episode_reward_predictions
      / float(n))
  old_path = os.path.join(epoch_data_dir, "debug_frames_env")
  new_path = os.path.join(epoch_data_dir, "debug_frames_env_eval")
  tf.gfile.Rename(old_path, new_path)
  return model_reward_accuracy
Exemplo n.º 60
0
def t2t_score_file(filename):
  """
  Score each line in a file and return the scores.

  :param str filename: T2T checkpoint
  """
  # Prepare model.
  hparams = create_t2t_hparams()
  encoders = registry.problem(FLAGS_problem).feature_encoders(FLAGS_data_dir)

  # Prepare features for feeding into the model.
  inputs_ph = tf.placeholder(dtype=tf.int32, shape=(None, None))  # Just length dimension.
  targets_ph = tf.placeholder(dtype=tf.int32, shape=(None, None))  # Just length dimension.

  features = {
      "inputs": inputs_ph,
      "targets": targets_ph,
  }

  # Prepare the model and the graph when model runs on features.
  model = registry.model(FLAGS_model)(hparams, tf.estimator.ModeKeys.EVAL)
  assert isinstance(model, tensor2tensor.models.transformer.Transformer)
  #       final_output: tensor of logits with shape [batch_size, O, P, body_output_size.
  #       losses: either single loss as a scalar, a list, a tensor (to be averaged)
  #               or a dictionary of losses.
  final_output, losses = model(features)
  assert isinstance(losses, dict)
  saver = tf.train.Saver()

  sess = tf.Session()
  # Load weights from checkpoint.
  ckpts = tf.train.get_checkpoint_state(FLAGS_output_dir)
  ckpt = ckpts.model_checkpoint_path
  saver.restore(sess, ckpt)

  # writer = tf.summary.FileWriter('logs', sess.graph)

  # writer.close()


  # Run on each line.
  results = []
  for line in open(filename):
    tab_split = line.split("\t")
    if len(tab_split) > 2:
      raise ValueError("Each line must have at most one tab separator.")
    assert len(tab_split) == 2
    targets = tab_split[1].strip()
    inputs = tab_split[0].strip()
    # Run encoders and append EOS symbol.
    targets_numpy = encoders["targets"].encode(targets) + [text_encoder.EOS_ID]
    inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID]
    # Prepare the feed.
    feed = {
        inputs_ph: [inputs_numpy],
        targets_ph: [targets_numpy]
    }

    np_res = sess.run({"losses": losses, "final_output": final_output}, feed_dict=feed)
    pprint(np_res)

    tvars = tf.trainable_variables()

    print('t2t inputs_ph:', inputs_ph, inputs_numpy)
    print('t2t targets_ph:', targets_ph, targets_numpy)

    return sess, tvars, inputs_ph, targets_ph, losses