Python stepfiles_iterator示例，tensor2tensor.utils.bleu_hook.stepfiles_iterator Python示例

示例#1

0

显示文件

文件： t2t_translate_all.py 项目： rickyHong/tensor2tensor-repl

def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    # pylint: disable=unused-variable
    model_dir = os.path.expanduser(FLAGS.model_dir)
    translations_dir = os.path.expanduser(FLAGS.translations_dir)
    source = os.path.expanduser(FLAGS.source)
    tf.gfile.MakeDirs(translations_dir)
    translated_base_file = os.path.join(translations_dir, FLAGS.problems)

    # Copy flags.txt with the original time, so t2t-bleu can report correct
    # relative time.
    flags_path = os.path.join(translations_dir, FLAGS.problems + "-flags.txt")
    if not os.path.exists(flags_path):
        shutil.copy2(os.path.join(model_dir, "flags.txt"), flags_path)

    locals_and_flags = {'FLAGS': FLAGS}
    for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes,
                                              FLAGS.min_steps):
        tf.logging.info("Translating " + model.filename)
        out_file = translated_base_file + "-" + str(model.steps)
        locals_and_flags.update(locals())
        if os.path.exists(out_file):
            tf.logging.info(out_file + " already exists, so skipping it.")
        else:
            tf.logging.info("Translating " + out_file)
            params = (
                "--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} "
                "--data_dir={FLAGS.data_dir} --problems={FLAGS.problems} "
                "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} "
                "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} "
                "--checkpoint_path={model.filename} --decode_from_file={source} "
                "--decode_to_file={out_file}").format(**locals_and_flags)
            command = FLAGS.decoder_command.format(**locals())
            tf.logging.info("Running:\n" + command)
            os.system(command)

示例#2

0

显示文件

文件： t2t_translate_all.py 项目： qixiuai/tensor2tensor

def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  # pylint: disable=unused-variable
  model_dir = os.path.expanduser(FLAGS.model_dir)
  translations_dir = os.path.expanduser(FLAGS.translations_dir)
  source = os.path.expanduser(FLAGS.source)
  tf.gfile.MakeDirs(translations_dir)
  translated_base_file = os.path.join(translations_dir, FLAGS.problem)

  # Copy flags.txt with the original time, so t2t-bleu can report correct
  # relative time.
  flags_path = os.path.join(translations_dir, FLAGS.problem + "-flags.txt")
  if not os.path.exists(flags_path):
    shutil.copy2(os.path.join(model_dir, "flags.txt"), flags_path)

  locals_and_flags = {"FLAGS": FLAGS}
  for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes,
                                            FLAGS.min_steps):
    tf.logging.info("Translating " + model.filename)
    out_file = translated_base_file + "-" + str(model.steps)
    locals_and_flags.update(locals())
    if os.path.exists(out_file):
      tf.logging.info(out_file + " already exists, so skipping it.")
    else:
      tf.logging.info("Translating " + out_file)
      params = (
          "--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} "
          "--data_dir={FLAGS.data_dir} --problem={FLAGS.problem} "
          "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} "
          "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} "
          "--checkpoint_path={model.filename} --decode_from_file={source} "
          "--decode_to_file={out_file} --keep_timestamp"
      ).format(**locals_and_flags)
      command = FLAGS.decoder_command.format(**locals())
      tf.logging.info("Running:\n" + command)
      os.system(command)

示例#3

0

显示文件

def main(_):
    tf.logging._handler.setFormatter(  # pylint: disable=protected-access
        logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None))
    tf.logging.set_verbosity(tf.logging.INFO)

    model_dir = os.path.expanduser(FLAGS.model_dir)
    output_dir = os.path.expanduser(FLAGS.output_dir)
    out_base_file = os.path.join(output_dir, "model.ckpt")

    # Copy flags.txt with the original time, so t2t-bleu can report correct
    # relative time.
    tf.gfile.MakeDirs(FLAGS.output_dir)
    if not os.path.exists(os.path.join(output_dir, "flags.txt")):
        shutil.copy2(os.path.join(model_dir, "flags.txt"),
                     os.path.join(output_dir, "flags.txt"))

    models_processed = 0
    queue = deque()
    for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes,
                                              FLAGS.min_steps):
        if models_processed == 0:
            var_list = tf.contrib.framework.list_variables(model.filename)
            avg_values = {}
            for (name, shape) in var_list:
                if not name.startswith("global_step"):
                    avg_values[name] = np.zeros(shape)
        models_processed += 1

        tf.logging.info("Loading [%d]: %s" %
                        (models_processed, model.filename))
        reader = tf.contrib.framework.load_checkpoint(model.filename)
        for name in avg_values:
            avg_values[name] += reader.get_tensor(name) / FLAGS.n
        queue.append(model)
        if len(queue) < FLAGS.n:
            continue

        out_file = "%s-%d" % (out_base_file, model.steps)
        tf_vars = []
        tf.logging.info("Averaging %s" % (out_file))
        for (name, value) in six.iteritems(avg_values):
            # TODO(martinpopel): dtype=var_dtypes[name]
            tf_vars.append(tf.get_variable(name, shape=value.shape))
        placeholders = [
            tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars
        ]
        assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]

        global_step = tf.get_variable("global_step",
                                      initializer=tf.constant(model.steps,
                                                              dtype=tf.int64),
                                      trainable=False)
        saver = tf.train.Saver(tf.global_variables())

        tf.logging.info("Running session for %s" % (out_file))
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for p, assign_op, (name, value) in zip(placeholders, assign_ops,
                                                   six.iteritems(avg_values)):
                sess.run(assign_op, {p: value})
            tf.logging.info("Storing to %s" % out_file)
            saver.save(sess, out_base_file, global_step=global_step)
        os.utime(out_file + ".index", (model.mtime, model.mtime))

        tf.reset_default_graph()
        first_model = queue.popleft()

        reader = tf.contrib.framework.load_checkpoint(first_model.filename)
        for name in avg_values:
            avg_values[name] -= reader.get_tensor(name) / FLAGS.n

示例#4

0

显示文件

文件： t2t_bleu.py 项目： qixiuai/tensor2tensor

def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  if FLAGS.translation:
    if FLAGS.translations_dir:
      raise ValueError(
          "Cannot specify both --translation and --translations_dir.")
    if FLAGS.bleu_variant in ("uncased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation,
                                          case_sensitive=False)
      print("BLEU_uncased = %6.2f" % bleu)
    if FLAGS.bleu_variant in ("cased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation,
                                          case_sensitive=True)
      print("BLEU_cased = %6.2f" % bleu)
    return

  if not FLAGS.translations_dir:
    raise ValueError(
        "Either --translation or --translations_dir must be specified.")
  transl_dir = os.path.expanduser(FLAGS.translations_dir)
  if not os.path.exists(transl_dir):
    exit_time = time.time() + FLAGS.wait_minutes * 60
    tf.logging.info("Translation dir %s does not exist, waiting till %s.",
                    transl_dir, time.asctime(time.localtime(exit_time)))
    while not os.path.exists(transl_dir):
      time.sleep(10)
      if time.time() > exit_time:
        raise ValueError("Translation dir %s does not exist" % transl_dir)

  last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt")
  if FLAGS.min_steps == -1:
    if tf.gfile.Exists(last_step_file):
      with open(last_step_file) as ls_file:
        FLAGS.min_steps = int(ls_file.read())
    else:
      FLAGS.min_steps = 0
  if FLAGS.report_zero is None:
    FLAGS.report_zero = FLAGS.min_steps == 0

  writer = tf.summary.FileWriter(FLAGS.event_dir)
  for transl_file in bleu_hook.stepfiles_iterator(
      transl_dir, FLAGS.wait_minutes, FLAGS.min_steps, path_suffix=""):
    # report_zero handling must be inside the for-loop,
    # so we are sure the transl_dir is already created.
    if FLAGS.report_zero:
      all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir))
      start_time = min(
          os.path.getmtime(f) for f in all_files if os.path.isfile(f))
      values = []
      if FLAGS.bleu_variant in ("uncased", "both"):
        values.append(tf.Summary.Value(
            tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=0))
      if FLAGS.bleu_variant in ("cased", "both"):
        values.append(tf.Summary.Value(
            tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=0))
      writer.add_event(tf.summary.Event(summary=tf.Summary(value=values),
                                        wall_time=start_time, step=0))
      FLAGS.report_zero = False

    filename = transl_file.filename
    tf.logging.info("Evaluating " + filename)
    values = []
    if FLAGS.bleu_variant in ("uncased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename,
                                          case_sensitive=False)
      values.append(tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                     simple_value=bleu))
      tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu))
    if FLAGS.bleu_variant in ("cased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename,
                                          case_sensitive=True)
      values.append(tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                     simple_value=bleu))
      tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu))
    writer.add_event(tf.summary.Event(
        summary=tf.Summary(value=values),
        wall_time=transl_file.mtime, step=transl_file.steps))
    writer.flush()
    with open(last_step_file, "w") as ls_file:
      ls_file.write(str(transl_file.steps) + "\n")

示例#5

0

显示文件

文件： t2t_bleu.py 项目： xinjianlv/tensor2tensor

def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    if FLAGS.translation:
        if FLAGS.translations_dir:
            raise ValueError(
                "Cannot specify both --translation and --translations_dir.")
        if FLAGS.bleu_variant in ("uncased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, FLAGS.translation, case_sensitive=False)
            print("BLEU_uncased = %6.2f" % bleu)
        if FLAGS.bleu_variant in ("cased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, FLAGS.translation, case_sensitive=True)
            print("BLEU_cased = %6.2f" % bleu)
        return

    if not FLAGS.translations_dir:
        raise ValueError(
            "Either --translation or --translations_dir must be specified.")
    transl_dir = os.path.expanduser(FLAGS.translations_dir)
    if not os.path.exists(transl_dir):
        exit_time = time.time() + FLAGS.wait_minutes * 60
        tf.logging.info("Translation dir %s does not exist, waiting till %s.",
                        transl_dir, time.asctime(time.localtime(exit_time)))
        while not os.path.exists(transl_dir):
            time.sleep(10)
            if time.time() > exit_time:
                raise ValueError("Translation dir %s does not exist" %
                                 transl_dir)

    last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt")
    if FLAGS.min_steps == -1:
        if tf.gfile.Exists(last_step_file):
            with open(last_step_file) as ls_file:
                FLAGS.min_steps = int(ls_file.read())
        else:
            FLAGS.min_steps = 0
    if FLAGS.report_zero is None:
        FLAGS.report_zero = FLAGS.min_steps == 0

    writer = tf.summary.FileWriter(FLAGS.event_dir)
    for transl_file in bleu_hook.stepfiles_iterator(transl_dir,
                                                    FLAGS.wait_minutes,
                                                    FLAGS.min_steps,
                                                    path_suffix=""):
        # report_zero handling must be inside the for-loop,
        # so we are sure the transl_dir is already created.
        if FLAGS.report_zero:
            all_files = (os.path.join(transl_dir, f)
                         for f in os.listdir(transl_dir))
            start_time = min(
                os.path.getmtime(f) for f in all_files if os.path.isfile(f))
            values = []
            if FLAGS.bleu_variant in ("uncased", "both"):
                values.append(
                    tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                     simple_value=0))
            if FLAGS.bleu_variant in ("cased", "both"):
                values.append(
                    tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                     simple_value=0))
            writer.add_event(
                tf.summary.Event(summary=tf.Summary(value=values),
                                 wall_time=start_time,
                                 step=0))
            FLAGS.report_zero = False

        filename = transl_file.filename
        tf.logging.info("Evaluating " + filename)
        values = []
        if FLAGS.bleu_variant in ("uncased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, filename, case_sensitive=False)
            values.append(
                tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                 simple_value=bleu))
            tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu))
        if FLAGS.bleu_variant in ("cased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, filename, case_sensitive=True)
            values.append(
                tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                 simple_value=bleu))
            tf.logging.info("%s: BLEU_cased = %6.2f" %
                            (transl_file.filename, bleu))
        writer.add_event(
            tf.summary.Event(summary=tf.Summary(value=values),
                             wall_time=transl_file.mtime,
                             step=transl_file.steps))
        writer.flush()
        with open(last_step_file, "w") as ls_file:
            ls_file.write(str(transl_file.steps) + "\n")

示例#6

0

显示文件

文件： t2t_avg_all.py 项目： kltony/tensor2tensor

def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  model_dir = os.path.expanduser(FLAGS.model_dir)
  output_dir = os.path.expanduser(FLAGS.output_dir)
  out_base_file = os.path.join(output_dir, "model.ckpt")

  # Copy flags.txt with the original time, so t2t-bleu can report correct
  # relative time.
  tf.gfile.MakeDirs(FLAGS.output_dir)
  if (not os.path.exists(os.path.join(output_dir, "flags.txt")) and
      os.path.exists(os.path.join(model_dir, "flags.txt"))):
    shutil.copy2(os.path.join(model_dir, "flags.txt"),
                 os.path.join(output_dir, "flags.txt"))

  models_processed = 0
  queue = deque()
  for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes,
                                            FLAGS.min_steps):
    if models_processed == 0:
      var_list = tf.contrib.framework.list_variables(model.filename)
      avg_values = {}
      for (name, shape) in var_list:
        if not name.startswith("global_step"):
          avg_values[name] = np.zeros(shape)
    models_processed += 1

    tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename))
    reader = tf.contrib.framework.load_checkpoint(model.filename)
    for name in avg_values:
      avg_values[name] += reader.get_tensor(name) / FLAGS.n
    queue.append(model)
    if len(queue) < FLAGS.n:
      continue

    out_file = "%s-%d" % (out_base_file, model.steps)
    tf_vars = []
    tf.logging.info("Averaging %s" % (out_file))
    for (name, value) in six.iteritems(avg_values):
      # TODO(martinpopel): dtype=var_dtypes[name]
      tf_vars.append(tf.get_variable(name, shape=value.shape))
    placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]
    assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]

    global_step = tf.get_variable(
        "global_step",
        initializer=tf.constant(model.steps, dtype=tf.int64),
        trainable=False)
    saver = tf.train.Saver(tf.global_variables())

    tf.logging.info("Running session for %s" % (out_file))
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      for p, assign_op, (name, value) in zip(
          placeholders, assign_ops, six.iteritems(avg_values)):
        sess.run(assign_op, {p: value})
      tf.logging.info("Storing to %s" % out_file)
      saver.save(sess, out_base_file, global_step=global_step)
    os.utime(out_file + ".index", (model.mtime, model.mtime))

    tf.reset_default_graph()
    first_model = queue.popleft()

    reader = tf.contrib.framework.load_checkpoint(first_model.filename)
    for name in avg_values:
      avg_values[name] -= reader.get_tensor(name) / FLAGS.n