Exemplo n.º 1
0
 def test_without_extra_args(self):
   pipeline_def = yaml.load("""
     class: ParallelTextInputPipeline
     params:
       source_files: ["file1"]
       target_files: ["file2"]
       num_epochs: 1
       shuffle: True
   """)
   pipeline = input_pipeline.make_input_pipeline_from_def(
       pipeline_def, tf.contrib.learn.ModeKeys.TRAIN)
   self.assertIsInstance(pipeline, input_pipeline.ParallelTextInputPipeline)
   #pylint: disable=W0212
   self.assertEqual(pipeline.params["source_files"], ["file1"])
   self.assertEqual(pipeline.params["target_files"], ["file2"])
   self.assertEqual(pipeline.params["num_epochs"], 1)
   self.assertEqual(pipeline.params["shuffle"], True)
Exemplo n.º 2
0
 def test_without_extra_args(self):
   pipeline_def = yaml.load("""
     class: ParallelTextInputPipeline
     params:
       source_files: ["file1"]
       target_files: ["file2"]
       num_epochs: 1
       shuffle: True
   """)
   pipeline = input_pipeline.make_input_pipeline_from_def(
       pipeline_def, tf.contrib.learn.ModeKeys.TRAIN)
   self.assertIsInstance(pipeline, input_pipeline.ParallelTextInputPipeline)
   #pylint: disable=W0212
   self.assertEqual(pipeline.params["source_files"], ["file1"])
   self.assertEqual(pipeline.params["target_files"], ["file2"])
   self.assertEqual(pipeline.params["num_epochs"], 1)
   self.assertEqual(pipeline.params["shuffle"], True)
Exemplo n.º 3
0
def everySenPre(flags_input_pipeline, model, hooks, model_dir, sess):
    flags_input_pipeline = _maybe_load_yaml(flags_input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        flags_input_pipeline, mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False, num_epochs=1)
    tf.reset_default_graph()
    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=flags_batch_size)

    saver = tf.train.Saver()
    checkpoint_path = flags_checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)

    # logging.basicConfig(filename='new.log',level=logging.DEBUG)

    sess = tf.train.MonitoredSession(session_creator=session_creator, hooks=hooks)
    output,result = sess.run([])

    # with tf.train.MonitoredSession(
    #         session_creator=session_creator,
    #         hooks=hooks) as sess:
    #     # sess.run(tf.global_variables_initializer())
    #     # Run until the inputs are exhausted
    #     while True:
    #         outputs, result = sess.run([])

    return result, sess
Exemplo n.º 4
0
  def foo_test_our_dataset(self):
    pipeline_def = yaml.load("""
      class: ParallelTextInputPipeline
      params:
        source_files: "/data/input_tiny.txt"
        target_files: "/data/output_tiny.txt"
        num_epochs: 1
        shuffle: True
    """)

    pdb.set_trace()

    pipeline = input_pipeline.make_input_pipeline_from_def(
        pipeline_def, tf.contrib.learn.ModeKeys.TRAIN)
    self.assertIsInstance(pipeline, input_pipeline.ParallelTextInputPipeline)
    data_provider = pipeline.make_data_provider()

    features = pipeline.read_from_data_provider(data_provider)

    with self.test_session() as sess:
      foo = sess.run(features)

      with tf.contrib.slim.queues.QueueRunners(sess):
        res = sess.run(features)
Exemplo n.º 5
0
def create_experiment(output_dir):
    """
  Creates a new Experiment instance.

  Args:
    output_dir: Output directory for model checkpoints and summaries.
  """

    config = run_config.RunConfig(
        tf_random_seed=FLAGS.tf_random_seed,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config.tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth
    config.tf_config.log_device_placement = FLAGS.log_device_placement

    train_options = training_utils.TrainOptions(
        model_class=FLAGS.model, model_params=FLAGS.model_params)
    # On the main worker, save training options
    if config.is_chief:
        gfile.MakeDirs(output_dir)
        train_options.dump(output_dir)

    bucket_boundaries = None
    if FLAGS.buckets:
        bucket_boundaries = list(map(int, FLAGS.buckets.split(",")))

    # Training data input pipeline
    train_input_pipeline = input_pipeline.make_input_pipeline_from_def(
        def_dict=FLAGS.input_pipeline_train,
        mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Create training input function
    train_input_fn = training_utils.create_input_fn(
        pipeline=train_input_pipeline,
        batch_size=FLAGS.batch_size,
        bucket_boundaries=bucket_boundaries,
        mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Development data input pipeline
    dev_input_pipeline = input_pipeline.make_input_pipeline_from_def(
        def_dict=FLAGS.input_pipeline_dev,
        mode=tf.contrib.learn.ModeKeys.EVAL,
        shuffle=False,
        num_epochs=1)

    # Create eval input function
    eval_input_fn = training_utils.create_input_fn(
        pipeline=dev_input_pipeline,
        batch_size=FLAGS.batch_size,
        allow_smaller_final_batch=True,
        mode=tf.contrib.learn.ModeKeys.EVAL)

    def model_fn(features, labels, params, mode):
        """Builds the model graph"""
        model = _create_from_dict(
            {
                "class": train_options.model_class,
                "params": train_options.model_params
            },
            models,
            mode=mode)
        return model(features, labels, params)

    estimator = tf.contrib.learn.Estimator(model_fn=model_fn,
                                           model_dir=output_dir,
                                           config=config,
                                           params=FLAGS.model_params)

    # Create hooks
    train_hooks = []
    for dict_ in FLAGS.hooks:
        hook = _create_from_dict(dict_,
                                 hooks,
                                 model_dir=estimator.model_dir,
                                 run_config=config)
        train_hooks.append(hook)

    # Create metrics
    eval_metrics = {}
    for dict_ in FLAGS.metrics:
        metric = _create_from_dict(dict_, metric_specs)
        eval_metrics[metric.name] = metric

    experiment = PatchedExperiment(estimator=estimator,
                                   train_input_fn=train_input_fn,
                                   eval_input_fn=eval_input_fn,
                                   min_eval_frequency=FLAGS.eval_every_n_steps,
                                   train_steps=FLAGS.train_steps,
                                   eval_steps=None,
                                   eval_metrics=eval_metrics,
                                   train_monitors=train_hooks)

    return experiment
Exemplo n.º 6
0
for tdict in fl_tasks:
    if not "params" in tdict:
        tdict["params"] = {}
    task_cls = locate(str(tdict["class"])) or getattr(tasks, str(
        tdict["class"]))
    if (str(tdict["class"]) == "DecodeText"):
        task = task_cls(
            tdict["params"], callback_func=_save_prediction_to_dict)
    elif (str(tdict["class"]) == "DumpAttention"):
        task = task_cls(tdict["params"], callback_func=_handle_attention)

    hooks.append(task)

input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
    fl_input_pipeline,
    mode=tf.contrib.learn.ModeKeys.INFER,
    shuffle=False,
    num_epochs=1)

# Create the graph used for inference
predictions, _, _ = create_inference_graph(
    model=model, input_pipeline=input_pipeline_infer, batch_size=batch_size)

graph = tf.get_default_graph()

# Function to run inference.


def run_inference():
    # tf.reset_default_graph()
    with graph.as_default():
Exemplo n.º 7
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Create output dir
    if not os.path.exists(FLAGS.model_dir + '/pred'):
        os.makedirs(FLAGS.model_dir + '/pred')

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        print("******", task_cls)
        task = task_cls(tdict["params"],
                        callback_func=_save_prediction_to_dict)

        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Exemplo n.º 8
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver(tf.all_variables())
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

        if FLAGS.save_pb_during_infer:
            save_vars = {}
            for v in tf.trainable_variables():
                save_vars[v.value().name] = sess.run(v)
            g2 = tf.Graph()
            with g2.as_default():
                consts = {}
                for k in save_vars.keys():
                    consts[k] = tf.constant(save_vars[k])
                tf.import_graph_def(
                    sess.graph_def,
                    input_map={name: consts[name]
                               for name in consts.keys()})
                tf.train.write_graph(g2.as_graph_def(), FLAGS.save_pb_dir,
                                     'rnn.pb', False)
                tf.train.write_graph(g2.as_graph_def(), FLAGS.save_pb_dir,
                                     'rnn.txt')
            tf.logging.info("Save pb down! %s", FLAGS.save_pb_dir)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Exemplo n.º 9
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    # moving_average_variables = []
    #
    # for var in slim.get_model_variables():
    #     if 'resnet_v1_50' in var.name and 'fully_connected' not in var.name:
    #         moving_average_variables.append(var)
    #
    #
    # tf_global_step = slim.get_or_create_global_step()
    #
    # variable_averages = tf.train.ExponentialMovingAverage(
    #     0.99, tf_global_step)
    #
    # #get variable of the models and apply average to the concerned variables
    # variables_to_restore = variable_averages.variables_to_restore(
    #     moving_average_variables)
    #
    #
    # #TODO
    # #current checkpoint have model/att_seq2seq/model/att_seq2seq/encode/.... for the exp moving
    # #instead of model/att_seq2seq/
    # #so need to rename these:
    #
    # def name_in_checkpoint(var):
    #     if 'ExponentialMovingAverage' in var:
    #         return var.replace('model/att_seq2seq/', 'model/att_seq2seq/model/att_seq2seq/')
    #     return var
    #
    #
    # variables_to_restore = {name_in_checkpoint(k): v
    #                         for k,v in variables_to_restore.items()}
    #
    #
    # variables_to_restore[tf_global_step.op.name] = tf_global_step
    #
    # for k,v in variables_to_restore.items():
    #     print(k)
    #     print(v)
    #     print("#############")
    # import sys
    # sys.exit()

    # saver = tf.train.Saver(var_list=variables_to_restore)
    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Exemplo n.º 10
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    #def session_init_op(_scaffold, sess):
    #  saver.restore(sess, checkpoint_path)
    #  tf.logging.info("Restored model from %s", checkpoint_path)

    #scaffold = tf.train.Scaffold(init_fn=session_init_op)
    #session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    #with tf.train.MonitoredSession(
    #    session_creator=session_creator,
    #    hooks=hooks) as sess:

    #  # Run until the inputs are exhausted
    #  while not sess.should_stop():
    #    sess.run([])

    with tf.Session() as sess:
        print("start to restore checkpoint:{} into session".format(
            checkpoint_path))
        saver.restore(sess, checkpoint_path)
        saved_model_path = os.path.join(FLAGS.model_dir, FLAGS.export_dir)
        if os.path.exists(saved_model_path):
            print("remove old directory:{}".format(saved_model_path))
            shutil.rmtree(saved_model_path)
        print("start to export SavedModel")
        builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
        builder.add_meta_graph_and_variables(sess, ["fin_biseq2seq"])
        builder.save()
        print("finish exporting SavedModel")
Exemplo n.º 11
0
def create_experiment(output_dir):
  """
  Creates a new Experiment instance.

  Args:
    output_dir: Output directory for model checkpoints and summaries.
  """

  config = run_config.RunConfig(
      tf_random_seed=FLAGS.tf_random_seed,
      save_checkpoints_secs=FLAGS.save_checkpoints_secs,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      keep_checkpoint_max=FLAGS.keep_checkpoint_max,
      keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
      gpu_memory_fraction=FLAGS.gpu_memory_fraction)
  config.tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth
  config.tf_config.log_device_placement = FLAGS.log_device_placement

  train_options = training_utils.TrainOptions(
      model_class=FLAGS.model,
      model_params=FLAGS.model_params)
  # On the main worker, save training options
  if config.is_chief:
    gfile.MakeDirs(output_dir)
    train_options.dump(output_dir)

  bucket_boundaries = None
  if FLAGS.buckets:
    bucket_boundaries = list(map(int, FLAGS.buckets.split(",")))

  # Training data input pipeline
  train_input_pipeline = input_pipeline.make_input_pipeline_from_def(
      def_dict=FLAGS.input_pipeline_train,
      mode=tf.contrib.learn.ModeKeys.TRAIN)

  # Create training input function
  train_input_fn = training_utils.create_input_fn(
      pipeline=train_input_pipeline,
      batch_size=FLAGS.batch_size,
      bucket_boundaries=bucket_boundaries,
      scope="train_input_fn")

  # Development data input pipeline
  dev_input_pipeline = input_pipeline.make_input_pipeline_from_def(
      def_dict=FLAGS.input_pipeline_dev,
      mode=tf.contrib.learn.ModeKeys.EVAL,
      shuffle=False, num_epochs=1)

  # Create eval input function
  eval_input_fn = training_utils.create_input_fn(
      pipeline=dev_input_pipeline,
      batch_size=FLAGS.batch_size,
      allow_smaller_final_batch=True,
      scope="dev_input_fn")


  def model_fn(features, labels, params, mode):
    """Builds the model graph"""
    model = _create_from_dict({
        "class": train_options.model_class,
        "params": train_options.model_params
    }, models, mode=mode)
    return model(features, labels, params)

  estimator = tf.contrib.learn.Estimator(
      model_fn=model_fn,
      model_dir=output_dir,
      config=config,
      params=FLAGS.model_params)

  # Create hooks
  train_hooks = []
  for dict_ in FLAGS.hooks:
    hook = _create_from_dict(
        dict_, hooks,
        model_dir=estimator.model_dir,
        run_config=config)
    train_hooks.append(hook)

  # Create metrics
  eval_metrics = {}
  for dict_ in FLAGS.metrics:
    metric = _create_from_dict(dict_, metric_specs)
    eval_metrics[metric.name] = metric

  experiment = PatchedExperiment(
      estimator=estimator,
      train_input_fn=train_input_fn,
      eval_input_fn=eval_input_fn,
      min_eval_frequency=FLAGS.eval_every_n_steps,
      train_steps=FLAGS.train_steps,
      eval_steps=None,
      eval_metrics=eval_metrics,
      train_monitors=train_hooks)

  return experiment
Exemplo n.º 12
0
def main(_argv):
  """Program entry point.
  """

  # Load flags from config file
  if FLAGS.config_path:
    with gfile.GFile(FLAGS.config_path) as config_file:
      config_flags = yaml.load(config_file)
      for flag_key, flag_value in config_flags.items():
        setattr(FLAGS, flag_key, flag_value)

  if isinstance(FLAGS.tasks, string_types):
    FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

  if isinstance(FLAGS.input_pipeline, string_types):
    FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

  input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
      FLAGS.input_pipeline, mode=tf.contrib.learn.ModeKeys.INFER,
      shuffle=False, num_epochs=1)

  # Load saved training options
  train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

  # Create the model
  model_cls = locate(train_options.model_class) or \
    getattr(models, train_options.model_class)
  model_params = train_options.model_params
  model_params = _deep_merge_dict(
      model_params, _maybe_load_yaml(FLAGS.model_params))
  model = model_cls(
      params=model_params,
      mode=tf.contrib.learn.ModeKeys.INFER)

  # Load inference tasks
  hooks = []
  for tdict in FLAGS.tasks:
    if not "params" in tdict:
      tdict["params"] = {}
    task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
    task = task_cls(tdict["params"])
    hooks.append(task)

  # Create the graph used for inference
  predictions, _, _ = create_inference_graph(
      model=model,
      input_pipeline=input_pipeline_infer,
      batch_size=FLAGS.batch_size)

  saver = tf.train.Saver()
  checkpoint_path = FLAGS.checkpoint_path
  if not checkpoint_path:
    checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

  def session_init_op(_scaffold, sess):
    saver.restore(sess, checkpoint_path)
    tf.logging.info("Restored model from %s", checkpoint_path)

  scaffold = tf.train.Scaffold(init_fn=session_init_op)
  session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
  with tf.train.MonitoredSession(
      session_creator=session_creator,
      hooks=hooks) as sess:

    # Run until the inputs are exhausted
    while not sess.should_stop():
      sess.run([])
Exemplo n.º 13
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    if isinstance(FLAGS.model_params, string_types):
        FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)

    if isinstance(FLAGS.models, string_types):
        FLAGS.models = _maybe_load_yaml(FLAGS.models)
        for mdict in FLAGS.models:
            if 'params' not in mdict:
                mdict['params'] = {}

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # ---------- Load Models First to Load Model Paramerters ----------
    model_variables = []

    for mdict in FLAGS.models:
        # Load saved training options
        train_options = training_utils.TrainOptions.load(mdict['dir'])

        # Get the model class
        model_cls = locate(train_options.model_class) or getattr(
            model_clsses, train_options.model_class)

        # Load model params
        model_params = train_options.model_params
        model_params = _deep_merge_dict(model_params, mdict['params'])
        model_params = _deep_merge_dict(model_params, FLAGS.model_params)

        # Create model
        model = model_cls(params=model_params,
                          mode=tf.contrib.learn.ModeKeys.INFER)

        # Create computation graph
        predictions, _, _ = create_inference_graph(
            model=model,
            input_pipeline=input_pipeline_infer,
            batch_size=FLAGS.batch_size)

        # Get path to the checkpoint
        checkpoint_path = mdict[
            'checkpoint_path'] if 'checkpoint_path' in mdict else tf.train.latest_checkpoint(
                mdict['dir'])

        # Get Saver
        saver = tf.train.Saver()

        # Create session to load values
        with tf.Session() as sess:
            # Load model values from checkpoint
            saver.restore(sess, checkpoint_path)

            # List all variables
            variables = {}
            for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                variables[var.name] = var.eval()

            model_variables.append(variables)

        # Reset graph
        tf.reset_default_graph()

    # Create computation graph for ensemble
    models = []
    vocab_source = None
    vocab_target = None

    for n, (mdict, variables) in enumerate(zip(FLAGS.models, model_variables)):
        # Load saved training options
        train_options = training_utils.TrainOptions.load(mdict['dir'])

        # Get the model class
        model_cls = locate(train_options.model_class) or getattr(
            model_clsses, train_options.model_class)

        # Load model params
        model_params = train_options.model_params
        model_params = _deep_merge_dict(model_params, mdict['params'])
        model_params = _deep_merge_dict(model_params, FLAGS.model_params)

        # Create model
        model = model_cls(params=model_params,
                          mode=tf.contrib.learn.ModeKeys.INFER)

        models.append(model)

        # Predefine variables
        with tf.variable_scope('model{}'.format(n)):
            for name, value in variables.items():
                varname = name.split(':')[0]
                tf.get_variable(varname,
                                shape=value.shape,
                                initializer=tf.constant_initializer(value))

        # Create computation graph
        with tf.variable_scope('model{}'.format(n), reuse=True):
            predictions, _, _ = create_inference_graph(
                model=model,
                input_pipeline=input_pipeline_infer,
                batch_size=FLAGS.batch_size)

        # Get vocab informatin
        if 'vocab_source' in model_params:
            vocab_source = vocab_source if vocab_source else model_params[
                'vocab_source']
            assert vocab_source == model_params[
                'vocab_source'], 'Vocab Not Match'
        if 'vocab_target' in model_params:
            vocab_target = vocab_target if vocab_target else model_params[
                'vocab_target']
            assert vocab_target == model_params[
                'vocab_target'], 'Vocab Not Match'

    # Fill vocab info of model_params
    if vocab_source:
        FLAGS.model_params['vocab_source'] = vocab_source
    if vocab_target:
        FLAGS.model_params['vocab_target'] = vocab_target

    # Create Ensemble Models
    ensemble_model = EnsembleModel(models=models, params=FLAGS.model_params)

    # Create Computation Graph
    predictions, _, _ = create_inference_graph(ensemble_model,
                                               input_pipeline_infer,
                                               FLAGS.batch_size)

    # DEBUG
    #for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
    #  print(var.name)

    #exit();

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    with tf.train.MonitoredSession(hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Exemplo n.º 14
0
def create_estimator_and_specs(output_dir):
    sessionConfig = tf.ConfigProto(log_device_placement=True,
                                   allow_soft_placement=True)
    sessionConfig.gpu_options.allow_growth = FLAGS.gpu_allow_growth
    sessionConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction
    config = tf.estimator.RunConfig(
        tf_random_seed=FLAGS.tf_random_seed,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        session_config=sessionConfig,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours)

    train_options = training_utils.TrainOptions(
        model_class=FLAGS.model, model_params=FLAGS.model_params)
    # On the main worker, save training options
    if config.is_chief:
        gfile.MakeDirs(output_dir)
        train_options.dump(output_dir)

    bucket_boundaries = None
    if FLAGS.buckets:
        bucket_boundaries = list(map(int, FLAGS.buckets.split(",")))

    # Training data input pipeline
    train_input_pipeline = input_pipeline.make_input_pipeline_from_def(
        def_dict=FLAGS.input_pipeline_train,
        mode=tf.contrib.learn.ModeKeys.TRAIN)

    # Create training input function
    train_input_fn = training_utils.create_input_fn(
        pipeline=train_input_pipeline,
        batch_size=FLAGS.batch_size,
        bucket_boundaries=bucket_boundaries,
        scope="train_input_fn")

    # Development data input pipeline
    dev_input_pipeline = input_pipeline.make_input_pipeline_from_def(
        def_dict=FLAGS.input_pipeline_dev,
        mode=tf.contrib.learn.ModeKeys.EVAL,
        shuffle=False,
        num_epochs=1)

    # Create eval input function
    eval_input_fn = training_utils.create_input_fn(
        pipeline=dev_input_pipeline,
        batch_size=FLAGS.batch_size,
        allow_smaller_final_batch=True,
        scope="dev_input_fn")

    def model_fn(features, labels, params, mode):
        """Builds the model graph"""
        model = _create_from_dict(
            {
                "class": train_options.model_class,
                "params": train_options.model_params
            },
            models,
            mode=mode)
        (predictions, loss, train_op) = model(features, labels, params)

        # Create metrics
        eval_metrics = {}
        for dict_ in FLAGS.metrics:
            metric = _create_from_dict(dict_, metric_specs)
            eval_metrics[metric.name] = metric(features, labels, predictions)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op,
                                          eval_metric_ops=eval_metrics)

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=output_dir,
                                       config=config,
                                       params=FLAGS.model_params)

    # Create hooks
    train_hooks = []
    for dict_ in FLAGS.hooks:
        hook = _create_from_dict(dict_,
                                 hooks,
                                 model_dir=estimator.model_dir,
                                 run_config=config)
        train_hooks.append(hook)

    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=FLAGS.train_steps,
                                        hooks=train_hooks)
    eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
    return (estimator, train_spec, eval_spec)