Пример #1
0
def main(_argv):
    """The entrypoint for the script"""

    # Parse YAML FLAGS
    FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
    FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
    FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
    FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
    FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

    # Load flags from config file
    final_config = {}
    if FLAGS.config_paths:
        for config_path in FLAGS.config_paths.split(","):
            config_path = config_path.strip()
            if not config_path:
                continue
            config_path = os.path.abspath(config_path)
            tf.logging.info("Loading config from %s", config_path)
            with gfile.GFile(config_path.strip()) as config_file:
                config_flags = yaml.load(config_file)
                final_config = _deep_merge_dict(
                    final_config, config_flags
                )  ###merge the flags and values from all the files into a dict

    tf.logging.info("Final Config:\n%s", yaml.dump(
        final_config))  ###print the flags and values read from all the files

    # Merge flags with config values
    for flag_key, flag_value in final_config.items(
    ):  ###map the flags and values to FLAGS in the code
        if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key),
                                                   dict):
            merged_value = _deep_merge_dict(
                flag_value, getattr(FLAGS, flag_key)
            )  ###merge the values has been defined and the new values from the config files
            setattr(FLAGS, flag_key, merged_value)
        elif hasattr(FLAGS, flag_key):
            setattr(FLAGS, flag_key, flag_value)
        else:
            tf.logging.warning("Ignoring config flag: %s", flag_key)

    if FLAGS.save_checkpoints_secs is None \
      and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()  ###creat temporary files

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Пример #2
0
def main(_argv):
    """The entrypoint for the script"""

    # Parse YAML FLAGS
    FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
    FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
    FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
    FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
    FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

    # Load flags from config file
    final_config = {}
    if FLAGS.config_paths:
        for config_path in FLAGS.config_paths.split(","):
            config_path = config_path.strip()
            if not config_path:
                continue
            config_path = os.path.abspath(config_path)
            tf.logging.info("Loading config from %s", config_path)
            with gfile.GFile(config_path.strip()) as config_file:
                config_flags = yaml.load(config_file)
                final_config = _deep_merge_dict(final_config, config_flags)

    tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

    # Merge flags with config values
    for flag_key, flag_value in final_config.items():
        if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key),
                                                   dict):
            merged_value = _deep_merge_dict(flag_value,
                                            getattr(FLAGS, flag_key))
            setattr(FLAGS, flag_key, merged_value)
        elif hasattr(FLAGS, flag_key):
            setattr(FLAGS, flag_key, flag_value)
        else:
            tf.logging.warning("Ignoring config flag: %s", flag_key)

    if FLAGS.save_checkpoints_secs is None \
      and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    (estimator, train_spec,
     eval_spec) = create_estimator_and_specs(output_dir=FLAGS.output_dir)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Пример #3
0
def main(_argv):
  """The entrypoint for the script"""

  # Parse YAML FLAGS
  FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
  FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
  FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
  FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
  FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

  # Load flags from config file
  final_config = {}
  if FLAGS.config_paths:
    for config_path in FLAGS.config_paths.split(","):
      config_path = config_path.strip()
      if not config_path:
        continue
      config_path = os.path.abspath(config_path)
      tf.logging.info("Loading config from %s", config_path)
      with gfile.GFile(config_path.strip()) as config_file:
        config_flags = yaml.load(config_file)
        final_config = _deep_merge_dict(final_config, config_flags)

  tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

  # Merge flags with config values
  for flag_key, flag_value in final_config.items():
    if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key), dict):
      merged_value = _deep_merge_dict(flag_value, getattr(FLAGS, flag_key))
      setattr(FLAGS, flag_key, merged_value)
    elif hasattr(FLAGS, flag_key):
      setattr(FLAGS, flag_key, flag_value)
    else:
      tf.logging.warning("Ignoring config flag: %s", flag_key)

  if FLAGS.save_checkpoints_secs is None \
    and FLAGS.save_checkpoints_steps is None:
    FLAGS.save_checkpoints_secs = 600
    tf.logging.info("Setting save_checkpoints_secs to %d",
                    FLAGS.save_checkpoints_secs)

  if not FLAGS.output_dir:
    FLAGS.output_dir = tempfile.mkdtemp()

  if not FLAGS.input_pipeline_train:
    raise ValueError("You must specify input_pipeline_train")

  if not FLAGS.input_pipeline_dev:
    raise ValueError("You must specify input_pipeline_dev")

  learn_runner.run(
      experiment_fn=create_experiment,
      output_dir=FLAGS.output_dir,
      schedule=FLAGS.schedule)
Пример #4
0
def infer(flags_tasks, model_dir, flags_model_params):

    flags_tasks = _maybe_load_yaml(flags_tasks)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
                getattr(models, train_options.model_class)
    model_params = train_options.model_params

    model_params = _deep_merge_dict(
        model_params, _maybe_load_yaml(flags_model_params))
    model = model_cls(
        params=model_params,
        mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in flags_tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    return model, hooks
Пример #5
0
def main(_argv):
  """The entrypoint for the script"""

  # Parse YAML FLAGS
  FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
  FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
  FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
  FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
  FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

  # Load flags from config file
  final_config = {}
  if FLAGS.config_paths:
    for config_path in FLAGS.config_paths.split(","):
      config_path = config_path.strip()
      if not config_path:
        continue
      config_path = os.path.abspath(config_path)
      tf.logging.info("Loading config from %s", config_path)
      with gfile.GFile(config_path.strip()) as config_file:
        config_flags = yaml.load(config_file)
        final_config =  _deep_merge_dict(final_config, config_flags)

  tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

  # Merge flags with config values
  for flag_key, flag_value in final_config.items():
    if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key), dict):
      merged_value = _deep_merge_dict(flag_value, getattr(FLAGS, flag_key))
      setattr(FLAGS, flag_key, merged_value)
    else:
      setattr(FLAGS, flag_key, flag_value)

  if FLAGS.save_checkpoints_secs is None \
    and FLAGS.save_checkpoints_steps is None:
    FLAGS.save_checkpoints_secs = 600
    tf.logging.info("Setting save_checkpoints_secs to %d",
                    FLAGS.save_checkpoints_secs)

  if not FLAGS.output_dir:
    FLAGS.output_dir = tempfile.mkdtemp()

  learn_runner.run(
      experiment_fn=create_experiment,
      output_dir=FLAGS.output_dir,
      schedule=FLAGS.schedule)
Пример #6
0
    def __init__(self, _argv):
        if isinstance(FLAGS.tasks, string_types):
            FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

        # Load saved training options
        train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

        # Create the model
        model_cls = locate(train_options.model_class) or \
          getattr(models, train_options.model_class)
        model_params = train_options.model_params
        model_params = _deep_merge_dict(model_params,
                                        _maybe_load_yaml(FLAGS.model_params))
        self.model = model_cls(params=model_params,
                               mode=tf.contrib.learn.ModeKeys.INFER)

        # Load inference tasks
        hooks = []
        for tdict in FLAGS.tasks:
            if not "params" in tdict:
                tdict["params"] = {}
            task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
            task = task_cls(tdict["params"])
            hooks.append(task)

        # Create the graph used for inference
        self.input_data = tf.placeholder(tf.string)
        self.tokens = tf.string_split(self.input_data).values
        self.length = tf.size(self.tokens)
        self.features = {
            'source_tokens': tf.expand_dims(self.tokens, 0),
            'source_len': tf.expand_dims(self.length, 0)
        }
        self.predictions, _, _ = self.model(features=self.features,
                                            labels=None,
                                            params=None)
        saver = tf.train.Saver()
        checkpoint_path = FLAGS.checkpoint_path
        if not checkpoint_path:
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

        def session_init_op(_scaffold, sess):
            saver.restore(sess, checkpoint_path)
            tf.logging.info("Restored model from %s", checkpoint_path)

        scaffold = tf.train.Scaffold(init_fn=session_init_op)
        session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
        self.sess = tf.train.MonitoredSession(session_creator=session_creator,
                                              hooks=hooks)
        print('Model initialized')
Пример #7
0
def main(_argv):
  """Program entry point.
  """

  # Load flags from config file
  if FLAGS.config_path:
    with gfile.GFile(FLAGS.config_path) as config_file:
      config_flags = yaml.load(config_file)
      for flag_key, flag_value in config_flags.items():
        setattr(FLAGS, flag_key, flag_value)

  if isinstance(FLAGS.tasks, string_types):
    FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

  # if isinstance(FLAGS.input_pipeline, string_types):
  #   FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

  # input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
  #     FLAGS.input_pipeline, mode=tf.contrib.learn.ModeKeys.INFER,
  #     shuffle=False, num_epochs=1)

  # Load saved training options
  train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

  # Create the model
  model_cls = locate(train_options.model_class) or \
    getattr(models, train_options.model_class)
  model_params = train_options.model_params
  model_params = _deep_merge_dict(
      model_params, _maybe_load_yaml(FLAGS.model_params))
  model = model_cls(
      params=model_params,
      mode=tf.contrib.learn.ModeKeys.INFER)

  # Load inference tasks
  hooks = []
  print (FLAGS.tasks)
  for tdict in FLAGS.tasks:
    if not "params" in tdict:
      tdict["params"] = {}
    task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
    task = task_cls(tdict["params"])
    hooks.append(task)

  # Create the graph used for inference
  input_data = tf.placeholder(tf.string)
  tokens = tf.string_split(input_data).values
  length = tf.size(tokens)
  # features = {'source_tokens': tf.expand_dims(tokens, 0),
  #             'source_len':    tf.expand_dims(length, 0)}
  features = {'source_tokens': tf.stack([tokens, tokens]),
              'source_len':    tf.stack([length, length])}
  predictions, _, _ = model(features=features, labels=None, params=None)
  # predictions, _, _ = create_inference_graph(
  #     model=model,
  #     input_pipeline=input_pipeline_infer,
  #     batch_size=FLAGS.batch_size)

  saver = tf.train.Saver()
  checkpoint_path = FLAGS.checkpoint_path
  if not checkpoint_path:
    checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

  def session_init_op(_scaffold, sess):
    saver.restore(sess, checkpoint_path)
    tf.logging.info("Restored model from %s", checkpoint_path)

  scaffold = tf.train.Scaffold(init_fn=session_init_op)
  session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
  with tf.train.MonitoredSession(
      session_creator=session_creator,
      hooks=hooks) as sess:

    # Run until the inputs are exhausted
    # while not sess.should_stop():
    #   sess.run([])
    output = sess.run([predictions[u'predicted_tokens']], feed_dict={input_data: [u'15 11 5 15 0 2 2 15 15 0 7 10 4 4 12 12 16 SEQUENCE_END']})
    print (output[0][0])
    print (u' '.join(output[0][0][:-1]))
Пример #8
0
    return s


fl_tasks = _maybe_load_yaml(str(input_task_list))
fl_input_pipeline = _maybe_load_yaml(str(input_pipeline_dict))

# Load saved training options
train_options = training_utils.TrainOptions.load(model_dir_input)

# Create the model
model_cls = locate(train_options.model_class) or \
    getattr(models, train_options.model_class)
model_params = train_options.model_params
if (ARGS.beam_width != 1):
    model_params["inference.beam_search.beam_width"] = ARGS.beam_width
model_params = _deep_merge_dict(model_params, _maybe_load_yaml(model_params))
model = model_cls(params=model_params, mode=tf.contrib.learn.ModeKeys.INFER)

print("========model params ==========", model_params)


def _handle_attention(attention_scores):
    print(">>> Saved attention scores")


def _save_prediction_to_dict(output_string):
    # http://bcho.tistory.com/1173
    global decoded_string
    decoded_string = output_string

Пример #9
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Create output dir
    if not os.path.exists(FLAGS.model_dir + '/pred'):
        os.makedirs(FLAGS.model_dir + '/pred')

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        print("******", task_cls)
        task = task_cls(tdict["params"],
                        callback_func=_save_prediction_to_dict)

        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Пример #10
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    if isinstance(FLAGS.model_params, string_types):
        FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)

    if isinstance(FLAGS.models, string_types):
        FLAGS.models = _maybe_load_yaml(FLAGS.models)
        for mdict in FLAGS.models:
            if 'params' not in mdict:
                mdict['params'] = {}

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # ---------- Load Models First to Load Model Paramerters ----------
    model_variables = []

    for mdict in FLAGS.models:
        # Load saved training options
        train_options = training_utils.TrainOptions.load(mdict['dir'])

        # Get the model class
        model_cls = locate(train_options.model_class) or getattr(
            model_clsses, train_options.model_class)

        # Load model params
        model_params = train_options.model_params
        model_params = _deep_merge_dict(model_params, mdict['params'])
        model_params = _deep_merge_dict(model_params, FLAGS.model_params)

        # Create model
        model = model_cls(params=model_params,
                          mode=tf.contrib.learn.ModeKeys.INFER)

        # Create computation graph
        predictions, _, _ = create_inference_graph(
            model=model,
            input_pipeline=input_pipeline_infer,
            batch_size=FLAGS.batch_size)

        # Get path to the checkpoint
        checkpoint_path = mdict[
            'checkpoint_path'] if 'checkpoint_path' in mdict else tf.train.latest_checkpoint(
                mdict['dir'])

        # Get Saver
        saver = tf.train.Saver()

        # Create session to load values
        with tf.Session() as sess:
            # Load model values from checkpoint
            saver.restore(sess, checkpoint_path)

            # List all variables
            variables = {}
            for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                variables[var.name] = var.eval()

            model_variables.append(variables)

        # Reset graph
        tf.reset_default_graph()

    # Create computation graph for ensemble
    models = []
    vocab_source = None
    vocab_target = None

    for n, (mdict, variables) in enumerate(zip(FLAGS.models, model_variables)):
        # Load saved training options
        train_options = training_utils.TrainOptions.load(mdict['dir'])

        # Get the model class
        model_cls = locate(train_options.model_class) or getattr(
            model_clsses, train_options.model_class)

        # Load model params
        model_params = train_options.model_params
        model_params = _deep_merge_dict(model_params, mdict['params'])
        model_params = _deep_merge_dict(model_params, FLAGS.model_params)

        # Create model
        model = model_cls(params=model_params,
                          mode=tf.contrib.learn.ModeKeys.INFER)

        models.append(model)

        # Predefine variables
        with tf.variable_scope('model{}'.format(n)):
            for name, value in variables.items():
                varname = name.split(':')[0]
                tf.get_variable(varname,
                                shape=value.shape,
                                initializer=tf.constant_initializer(value))

        # Create computation graph
        with tf.variable_scope('model{}'.format(n), reuse=True):
            predictions, _, _ = create_inference_graph(
                model=model,
                input_pipeline=input_pipeline_infer,
                batch_size=FLAGS.batch_size)

        # Get vocab informatin
        if 'vocab_source' in model_params:
            vocab_source = vocab_source if vocab_source else model_params[
                'vocab_source']
            assert vocab_source == model_params[
                'vocab_source'], 'Vocab Not Match'
        if 'vocab_target' in model_params:
            vocab_target = vocab_target if vocab_target else model_params[
                'vocab_target']
            assert vocab_target == model_params[
                'vocab_target'], 'Vocab Not Match'

    # Fill vocab info of model_params
    if vocab_source:
        FLAGS.model_params['vocab_source'] = vocab_source
    if vocab_target:
        FLAGS.model_params['vocab_target'] = vocab_target

    # Create Ensemble Models
    ensemble_model = EnsembleModel(models=models, params=FLAGS.model_params)

    # Create Computation Graph
    predictions, _, _ = create_inference_graph(ensemble_model,
                                               input_pipeline_infer,
                                               FLAGS.batch_size)

    # DEBUG
    #for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
    #  print(var.name)

    #exit();

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    with tf.train.MonitoredSession(hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Пример #11
0
def main(_argv):
    """The entrypoint for the script"""

    # Parse YAML FLAGS
    FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
    FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
    FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
    FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
    FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

    # Load flags from config file
    final_config = {}
    if FLAGS.config_paths:
        for config_path in FLAGS.config_paths.split(","):
            config_path = config_path.strip()
            if not config_path:
                continue
            config_path = os.path.abspath(config_path)
            tf.logging.info("Loading config from %s", config_path)
            with gfile.GFile(config_path.strip()) as config_file:
                config_flags = yaml.load(config_file)
                final_config = _deep_merge_dict(final_config, config_flags)

    tf.logging.info("Flags from file:\n%s", yaml.dump(final_config))

    # Merge flags with config values
    for flag_key, flag_value in final_config.items():
        if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key),
                                                   dict):
            merged_value = _deep_merge_dict(flag_value,
                                            getattr(FLAGS, flag_key))
            setattr(FLAGS, flag_key, merged_value)
        elif hasattr(FLAGS, flag_key):
            setattr(FLAGS, flag_key, flag_value)
        else:
            tf.logging.warning("Ignoring config flag: %s", flag_key)

    if FLAGS.save_checkpoints_secs is None \
      and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    ###expand source_files and dev_files
    train_source_files = FLAGS.input_pipeline_train["params"]["source_files"]
    train_target_files = FLAGS.input_pipeline_train["params"]["target_files"]
    FLAGS.input_pipeline_train["params"]["source_files"] = expand_paths(
        train_source_files)
    FLAGS.input_pipeline_train["params"]["target_files"] = expand_paths(
        train_target_files)
    dev_source_files = FLAGS.input_pipeline_dev["params"]["source_files"]
    dev_target_files = FLAGS.input_pipeline_dev["params"]["target_files"]
    FLAGS.input_pipeline_dev["params"]["source_files"] = expand_paths(
        dev_source_files)
    FLAGS.input_pipeline_dev["params"]["target_files"] = expand_paths(
        dev_target_files)

    tf.logging.info("now flags:")
    tf.logging.info(FLAGS.__dict__["__flags"])

    ##get and change schedule here
    config = get_run_config()
    schedule = get_distributed_schedule(config)
    setattr(FLAGS, "schedule", schedule)
    tf.logging.warn("{} 's schdule: {}".format(config.master, schedule))

    tf.logging.warn("flags:", FLAGS.__dict__["__flags"])

    if FLAGS.clear_output_dir is True:
        if os.path.exists(FLAGS.output_dir) is True:
            shutil.rmtree(FLAGS.output_dir)
            tf.logging.debug("rm output dir:{}".format(FLAGS.output_dir))

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Пример #12
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver(tf.all_variables())
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

        if FLAGS.save_pb_during_infer:
            save_vars = {}
            for v in tf.trainable_variables():
                save_vars[v.value().name] = sess.run(v)
            g2 = tf.Graph()
            with g2.as_default():
                consts = {}
                for k in save_vars.keys():
                    consts[k] = tf.constant(save_vars[k])
                tf.import_graph_def(
                    sess.graph_def,
                    input_map={name: consts[name]
                               for name in consts.keys()})
                tf.train.write_graph(g2.as_graph_def(), FLAGS.save_pb_dir,
                                     'rnn.pb', False)
                tf.train.write_graph(g2.as_graph_def(), FLAGS.save_pb_dir,
                                     'rnn.txt')
            tf.logging.info("Save pb down! %s", FLAGS.save_pb_dir)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Пример #13
0
def main(_argv):
    """The entrypoint for the script"""

    global FLAGS
    if conf.IN_LOCAL_ENV:
        # Parse YAML FLAGS
        FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
        FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
        FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
        FLAGS.input_pipeline_train = _maybe_load_yaml(
            FLAGS.input_pipeline_train)
        FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

        # Load flags from config file
        final_config = {}
        if FLAGS.config_paths:
            for config_path in FLAGS.config_paths.split(","):
                config_path = config_path.strip()
                if not config_path:
                    continue
                config_path = os.path.abspath(config_path)
                tf.logging.info("Loading config from %s", config_path)
                with gfile.GFile(config_path.strip()) as config_file:
                    config_flags = yaml.load(config_file)
                    final_config = _deep_merge_dict(final_config, config_flags)

        tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

        # Merge flags with config values
        for flag_key, flag_value in final_config.items():
            if hasattr(FLAGS, flag_key) and isinstance(
                    getattr(FLAGS, flag_key), dict):
                merged_value = _deep_merge_dict(flag_value,
                                                getattr(FLAGS, flag_key))
                setattr(FLAGS, flag_key, merged_value)
            elif hasattr(FLAGS, flag_key):
                setattr(FLAGS, flag_key, flag_value)
            else:
                tf.logging.warning("Ignoring config flag: %s", flag_key)

        if FLAGS.save_checkpoints_secs is None \
          and FLAGS.save_checkpoints_steps is None:
            FLAGS.save_checkpoints_secs = 600
            tf.logging.info("Setting save_checkpoints_secs to %d",
                            FLAGS.save_checkpoints_secs)

        #save FLAGS in json
        #FLAGS._parse_flags()
        print("-----------------")
        print(FLAGS.__dict__)
        pickle.dump(FLAGS.__dict__['__flags'], open("train_flags.pkl", "wb"))
    else:
        saved = pickle.load(open("train_flags.pkl", "rb"))
        for k, v in saved.items():
            if k == "model_output_path":
                print("ignore model_output_path")
                continue
            setattr(FLAGS, k, v)
        print("-----------------")
        print(FLAGS.__dict__)

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)

    if not conf.IN_LOCAL_ENV:
        if not hdfs.path.exists(FLAGS.model_output_path):
            hdfs.mkdir(FLAGS.model_output_path)
            print("make hdfs dir:{}".format(FLAGS.model_output_path))
            print("dir:{} exists:{}".format(
                FLAGS.model_output_path,
                hdfs.path.exists(FLAGS.model_output_path)))
        print("copying files from {} to hdfs:{}".format(
            FLAGS.output_dir, FLAGS.model_output_path))
        output_files = os.listdir(FLAGS.output_dir)
        for f in output_files:
            if f in COPY_FILE_LIST:
                print("copy file:{} to hdfs:{}".format(
                    f, FLAGS.model_output_path))
                pydoop.hdfs.put(os.path.join(FLAGS.output_dir, f),
                                FLAGS.model_output_path)
            else:
                for p in COPY_PATTERN_LIST:
                    if re.match(p, f):
                        print("copy file:{} to hdfs:{}".format(
                            f, FLAGS.model_output_path))
                        pydoop.hdfs.put(os.path.join(FLAGS.output_dir, f),
                                        FLAGS.model_output_path)
Пример #14
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    # moving_average_variables = []
    #
    # for var in slim.get_model_variables():
    #     if 'resnet_v1_50' in var.name and 'fully_connected' not in var.name:
    #         moving_average_variables.append(var)
    #
    #
    # tf_global_step = slim.get_or_create_global_step()
    #
    # variable_averages = tf.train.ExponentialMovingAverage(
    #     0.99, tf_global_step)
    #
    # #get variable of the models and apply average to the concerned variables
    # variables_to_restore = variable_averages.variables_to_restore(
    #     moving_average_variables)
    #
    #
    # #TODO
    # #current checkpoint have model/att_seq2seq/model/att_seq2seq/encode/.... for the exp moving
    # #instead of model/att_seq2seq/
    # #so need to rename these:
    #
    # def name_in_checkpoint(var):
    #     if 'ExponentialMovingAverage' in var:
    #         return var.replace('model/att_seq2seq/', 'model/att_seq2seq/model/att_seq2seq/')
    #     return var
    #
    #
    # variables_to_restore = {name_in_checkpoint(k): v
    #                         for k,v in variables_to_restore.items()}
    #
    #
    # variables_to_restore[tf_global_step.op.name] = tf_global_step
    #
    # for k,v in variables_to_restore.items():
    #     print(k)
    #     print(v)
    #     print("#############")
    # import sys
    # sys.exit()

    # saver = tf.train.Saver(var_list=variables_to_restore)
    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    def session_init_op(_scaffold, sess):
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Restored model from %s", checkpoint_path)

    scaffold = tf.train.Scaffold(init_fn=session_init_op)
    session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    with tf.train.MonitoredSession(session_creator=session_creator,
                                   hooks=hooks) as sess:

        # Run until the inputs are exhausted
        while not sess.should_stop():
            sess.run([])
Пример #15
0
def main(_argv):
    """The entrypoint for the script"""
    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()

    add_file_logger(FLAGS.output_dir)
    _dump_flags()

    # Parse YAML FLAGS
    FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
    FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
    FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
    FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
    FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

    # Load flags from config file
    final_config = {}
    if FLAGS.config_paths:
        for config_path in FLAGS.config_paths.split(","):
            config_path = config_path.strip()
            if not config_path:
                continue
            config_path = os.path.abspath(config_path)
            tf.logging.info("Loading config from %s", config_path)
            with gfile.GFile(config_path.strip()) as config_file:
                config_flags = yaml.load(config_file)

                # Update config_flags
                if FLAGS.learning_rate:
                    if "model_params" in config_flags:
                        config_flags["model_params"].update(
                            {'optimizer.learning_rate': FLAGS.learning_rate})

                if FLAGS.source_embedding:
                    if "model_params" in config_flags:
                        config_flags["model_params"].update({
                            'embedding.source_embedding':
                            FLAGS.source_embedding
                        })

                if FLAGS.target_embedding:
                    if "model_params" in config_flags:
                        config_flags["model_params"].update({
                            'embedding.target_embedding':
                            FLAGS.target_embedding
                        })
                """
        if FLAGS.optimizer_name:
          if "model_params" in config_flags:
            config_flags["model_params"].update({'optimizer.name': FLAGS.learning_rate})"""

                final_config = _deep_merge_dict(final_config, config_flags)

    tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

    # Merge flags with config values
    for flag_key, flag_value in final_config.items():
        if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key),
                                                   dict):
            merged_value = _deep_merge_dict(flag_value,
                                            getattr(FLAGS, flag_key))
            setattr(FLAGS, flag_key, merged_value)
        elif hasattr(FLAGS, flag_key):
            setattr(FLAGS, flag_key, flag_value)
        else:
            tf.logging.warning("Ignoring config flag: %s", flag_key)

    if FLAGS.save_checkpoints_secs is None \
      and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Пример #16
0
def main(_argv):
  """Program entry point.
  """

  # Load flags from config file
  if FLAGS.config_path:
    with gfile.GFile(FLAGS.config_path) as config_file:
      config_flags = yaml.load(config_file)
      for flag_key, flag_value in config_flags.items():
        setattr(FLAGS, flag_key, flag_value)

  if isinstance(FLAGS.tasks, string_types):
    FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

  if isinstance(FLAGS.input_pipeline, string_types):
    FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

  input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
      FLAGS.input_pipeline, mode=tf.contrib.learn.ModeKeys.INFER,
      shuffle=False, num_epochs=1)

  # Load saved training options
  train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

  # Create the model
  model_cls = locate(train_options.model_class) or \
    getattr(models, train_options.model_class)
  model_params = train_options.model_params
  model_params = _deep_merge_dict(
      model_params, _maybe_load_yaml(FLAGS.model_params))
  model = model_cls(
      params=model_params,
      mode=tf.contrib.learn.ModeKeys.INFER)

  # Load inference tasks
  hooks = []
  for tdict in FLAGS.tasks:
    if not "params" in tdict:
      tdict["params"] = {}
    task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
    task = task_cls(tdict["params"])
    hooks.append(task)

  # Create the graph used for inference
  predictions, _, _ = create_inference_graph(
      model=model,
      input_pipeline=input_pipeline_infer,
      batch_size=FLAGS.batch_size)

  saver = tf.train.Saver()
  checkpoint_path = FLAGS.checkpoint_path
  if not checkpoint_path:
    checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

  def session_init_op(_scaffold, sess):
    saver.restore(sess, checkpoint_path)
    tf.logging.info("Restored model from %s", checkpoint_path)

  scaffold = tf.train.Scaffold(init_fn=session_init_op)
  session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
  with tf.train.MonitoredSession(
      session_creator=session_creator,
      hooks=hooks) as sess:

    # Run until the inputs are exhausted
    while not sess.should_stop():
      sess.run([])
Пример #17
0
def main(_argv):
    """Program entry point.
  """

    # Load flags from config file
    if FLAGS.config_path:
        with gfile.GFile(FLAGS.config_path) as config_file:
            config_flags = yaml.load(config_file)
            for flag_key, flag_value in config_flags.items():
                setattr(FLAGS, flag_key, flag_value)

    if isinstance(FLAGS.tasks, string_types):
        FLAGS.tasks = _maybe_load_yaml(FLAGS.tasks)

    if isinstance(FLAGS.input_pipeline, string_types):
        FLAGS.input_pipeline = _maybe_load_yaml(FLAGS.input_pipeline)

    input_pipeline_infer = input_pipeline.make_input_pipeline_from_def(
        FLAGS.input_pipeline,
        mode=tf.contrib.learn.ModeKeys.INFER,
        shuffle=False,
        num_epochs=1)

    # Load saved training options
    train_options = training_utils.TrainOptions.load(FLAGS.model_dir)

    # Create the model
    model_cls = locate(train_options.model_class) or \
      getattr(models, train_options.model_class)
    model_params = train_options.model_params
    model_params = _deep_merge_dict(model_params,
                                    _maybe_load_yaml(FLAGS.model_params))
    model = model_cls(params=model_params,
                      mode=tf.contrib.learn.ModeKeys.INFER)

    # Load inference tasks
    hooks = []
    for tdict in FLAGS.tasks:
        if not "params" in tdict:
            tdict["params"] = {}
        task_cls = locate(tdict["class"]) or getattr(tasks, tdict["class"])
        task = task_cls(tdict["params"])
        hooks.append(task)

    # Create the graph used for inference
    predictions, _, _ = create_inference_graph(
        model=model,
        input_pipeline=input_pipeline_infer,
        batch_size=FLAGS.batch_size)

    saver = tf.train.Saver()
    checkpoint_path = FLAGS.checkpoint_path
    if not checkpoint_path:
        checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)

    #def session_init_op(_scaffold, sess):
    #  saver.restore(sess, checkpoint_path)
    #  tf.logging.info("Restored model from %s", checkpoint_path)

    #scaffold = tf.train.Scaffold(init_fn=session_init_op)
    #session_creator = tf.train.ChiefSessionCreator(scaffold=scaffold)
    #with tf.train.MonitoredSession(
    #    session_creator=session_creator,
    #    hooks=hooks) as sess:

    #  # Run until the inputs are exhausted
    #  while not sess.should_stop():
    #    sess.run([])

    with tf.Session() as sess:
        print("start to restore checkpoint:{} into session".format(
            checkpoint_path))
        saver.restore(sess, checkpoint_path)
        saved_model_path = os.path.join(FLAGS.model_dir, FLAGS.export_dir)
        if os.path.exists(saved_model_path):
            print("remove old directory:{}".format(saved_model_path))
            shutil.rmtree(saved_model_path)
        print("start to export SavedModel")
        builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
        builder.add_meta_graph_and_variables(sess, ["fin_biseq2seq"])
        builder.save()
        print("finish exporting SavedModel")