def main(_): tf.logging.set_verbosity(tf.logging.INFO) # pylint: disable=unused-variable model_dir = os.path.expanduser(FLAGS.model_dir) translations_dir = os.path.expanduser(FLAGS.translations_dir) source = os.path.expanduser(FLAGS.source) tf.gfile.MakeDirs(translations_dir) translated_base_file = os.path.join(translations_dir, FLAGS.problems) # Copy flags.txt with the original time, so t2t-bleu can report correct # relative time. flags_path = os.path.join(translations_dir, FLAGS.problems + "-flags.txt") if not os.path.exists(flags_path): shutil.copy2(os.path.join(model_dir, "flags.txt"), flags_path) locals_and_flags = {'FLAGS': FLAGS} for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): tf.logging.info("Translating " + model.filename) out_file = translated_base_file + "-" + str(model.steps) locals_and_flags.update(locals()) if os.path.exists(out_file): tf.logging.info(out_file + " already exists, so skipping it.") else: tf.logging.info("Translating " + out_file) params = ( "--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} " "--data_dir={FLAGS.data_dir} --problems={FLAGS.problems} " "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} " "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} " "--checkpoint_path={model.filename} --decode_from_file={source} " "--decode_to_file={out_file}").format(**locals_and_flags) command = FLAGS.decoder_command.format(**locals()) tf.logging.info("Running:\n" + command) os.system(command)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # pylint: disable=unused-variable model_dir = os.path.expanduser(FLAGS.model_dir) translations_dir = os.path.expanduser(FLAGS.translations_dir) source = os.path.expanduser(FLAGS.source) tf.gfile.MakeDirs(translations_dir) translated_base_file = os.path.join(translations_dir, FLAGS.problem) # Copy flags.txt with the original time, so t2t-bleu can report correct # relative time. flags_path = os.path.join(translations_dir, FLAGS.problem + "-flags.txt") if not os.path.exists(flags_path): shutil.copy2(os.path.join(model_dir, "flags.txt"), flags_path) locals_and_flags = {"FLAGS": FLAGS} for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): tf.logging.info("Translating " + model.filename) out_file = translated_base_file + "-" + str(model.steps) locals_and_flags.update(locals()) if os.path.exists(out_file): tf.logging.info(out_file + " already exists, so skipping it.") else: tf.logging.info("Translating " + out_file) params = ( "--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} " "--data_dir={FLAGS.data_dir} --problem={FLAGS.problem} " "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} " "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} " "--checkpoint_path={model.filename} --decode_from_file={source} " "--decode_to_file={out_file} --keep_timestamp" ).format(**locals_and_flags) command = FLAGS.decoder_command.format(**locals()) tf.logging.info("Running:\n" + command) os.system(command)
def main(_): tf.logging._handler.setFormatter( # pylint: disable=protected-access logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None)) tf.logging.set_verbosity(tf.logging.INFO) model_dir = os.path.expanduser(FLAGS.model_dir) output_dir = os.path.expanduser(FLAGS.output_dir) out_base_file = os.path.join(output_dir, "model.ckpt") # Copy flags.txt with the original time, so t2t-bleu can report correct # relative time. tf.gfile.MakeDirs(FLAGS.output_dir) if not os.path.exists(os.path.join(output_dir, "flags.txt")): shutil.copy2(os.path.join(model_dir, "flags.txt"), os.path.join(output_dir, "flags.txt")) models_processed = 0 queue = deque() for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): if models_processed == 0: var_list = tf.contrib.framework.list_variables(model.filename) avg_values = {} for (name, shape) in var_list: if not name.startswith("global_step"): avg_values[name] = np.zeros(shape) models_processed += 1 tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename)) reader = tf.contrib.framework.load_checkpoint(model.filename) for name in avg_values: avg_values[name] += reader.get_tensor(name) / FLAGS.n queue.append(model) if len(queue) < FLAGS.n: continue out_file = "%s-%d" % (out_base_file, model.steps) tf_vars = [] tf.logging.info("Averaging %s" % (out_file)) for (name, value) in six.iteritems(avg_values): # TODO(martinpopel): dtype=var_dtypes[name] tf_vars.append(tf.get_variable(name, shape=value.shape)) placeholders = [ tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars ] assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] global_step = tf.get_variable("global_step", initializer=tf.constant(model.steps, dtype=tf.int64), trainable=False) saver = tf.train.Saver(tf.global_variables()) tf.logging.info("Running session for %s" % (out_file)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)): sess.run(assign_op, {p: value}) tf.logging.info("Storing to %s" % out_file) saver.save(sess, out_base_file, global_step=global_step) os.utime(out_file + ".index", (model.mtime, model.mtime)) tf.reset_default_graph() first_model = queue.popleft() reader = tf.contrib.framework.load_checkpoint(first_model.filename) for name in avg_values: avg_values[name] -= reader.get_tensor(name) / FLAGS.n
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.translation: if FLAGS.translations_dir: raise ValueError( "Cannot specify both --translation and --translations_dir.") if FLAGS.bleu_variant in ("uncased", "both"): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) print("BLEU_uncased = %6.2f" % bleu) if FLAGS.bleu_variant in ("cased", "both"): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) print("BLEU_cased = %6.2f" % bleu) return if not FLAGS.translations_dir: raise ValueError( "Either --translation or --translations_dir must be specified.") transl_dir = os.path.expanduser(FLAGS.translations_dir) if not os.path.exists(transl_dir): exit_time = time.time() + FLAGS.wait_minutes * 60 tf.logging.info("Translation dir %s does not exist, waiting till %s.", transl_dir, time.asctime(time.localtime(exit_time))) while not os.path.exists(transl_dir): time.sleep(10) if time.time() > exit_time: raise ValueError("Translation dir %s does not exist" % transl_dir) last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt") if FLAGS.min_steps == -1: if tf.gfile.Exists(last_step_file): with open(last_step_file) as ls_file: FLAGS.min_steps = int(ls_file.read()) else: FLAGS.min_steps = 0 if FLAGS.report_zero is None: FLAGS.report_zero = FLAGS.min_steps == 0 writer = tf.summary.FileWriter(FLAGS.event_dir) for transl_file in bleu_hook.stepfiles_iterator( transl_dir, FLAGS.wait_minutes, FLAGS.min_steps, path_suffix=""): # report_zero handling must be inside the for-loop, # so we are sure the transl_dir is already created. if FLAGS.report_zero: all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir)) start_time = min( os.path.getmtime(f) for f in all_files if os.path.isfile(f)) values = [] if FLAGS.bleu_variant in ("uncased", "both"): values.append(tf.Summary.Value( tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=0)) if FLAGS.bleu_variant in ("cased", "both"): values.append(tf.Summary.Value( tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=0)) writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=start_time, step=0)) FLAGS.report_zero = False filename = transl_file.filename tf.logging.info("Evaluating " + filename) values = [] if FLAGS.bleu_variant in ("uncased", "both"): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=False) values.append(tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu)) if FLAGS.bleu_variant in ("cased", "both"): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=True) values.append(tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu)) writer.add_event(tf.summary.Event( summary=tf.Summary(value=values), wall_time=transl_file.mtime, step=transl_file.steps)) writer.flush() with open(last_step_file, "w") as ls_file: ls_file.write(str(transl_file.steps) + "\n")
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.translation: if FLAGS.translations_dir: raise ValueError( "Cannot specify both --translation and --translations_dir.") if FLAGS.bleu_variant in ("uncased", "both"): bleu = 100 * bleu_hook.bleu_wrapper( FLAGS.reference, FLAGS.translation, case_sensitive=False) print("BLEU_uncased = %6.2f" % bleu) if FLAGS.bleu_variant in ("cased", "both"): bleu = 100 * bleu_hook.bleu_wrapper( FLAGS.reference, FLAGS.translation, case_sensitive=True) print("BLEU_cased = %6.2f" % bleu) return if not FLAGS.translations_dir: raise ValueError( "Either --translation or --translations_dir must be specified.") transl_dir = os.path.expanduser(FLAGS.translations_dir) if not os.path.exists(transl_dir): exit_time = time.time() + FLAGS.wait_minutes * 60 tf.logging.info("Translation dir %s does not exist, waiting till %s.", transl_dir, time.asctime(time.localtime(exit_time))) while not os.path.exists(transl_dir): time.sleep(10) if time.time() > exit_time: raise ValueError("Translation dir %s does not exist" % transl_dir) last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt") if FLAGS.min_steps == -1: if tf.gfile.Exists(last_step_file): with open(last_step_file) as ls_file: FLAGS.min_steps = int(ls_file.read()) else: FLAGS.min_steps = 0 if FLAGS.report_zero is None: FLAGS.report_zero = FLAGS.min_steps == 0 writer = tf.summary.FileWriter(FLAGS.event_dir) for transl_file in bleu_hook.stepfiles_iterator(transl_dir, FLAGS.wait_minutes, FLAGS.min_steps, path_suffix=""): # report_zero handling must be inside the for-loop, # so we are sure the transl_dir is already created. if FLAGS.report_zero: all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir)) start_time = min( os.path.getmtime(f) for f in all_files if os.path.isfile(f)) values = [] if FLAGS.bleu_variant in ("uncased", "both"): values.append( tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=0)) if FLAGS.bleu_variant in ("cased", "both"): values.append( tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=0)) writer.add_event( tf.summary.Event(summary=tf.Summary(value=values), wall_time=start_time, step=0)) FLAGS.report_zero = False filename = transl_file.filename tf.logging.info("Evaluating " + filename) values = [] if FLAGS.bleu_variant in ("uncased", "both"): bleu = 100 * bleu_hook.bleu_wrapper( FLAGS.reference, filename, case_sensitive=False) values.append( tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu)) if FLAGS.bleu_variant in ("cased", "both"): bleu = 100 * bleu_hook.bleu_wrapper( FLAGS.reference, filename, case_sensitive=True) values.append( tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu)) writer.add_event( tf.summary.Event(summary=tf.Summary(value=values), wall_time=transl_file.mtime, step=transl_file.steps)) writer.flush() with open(last_step_file, "w") as ls_file: ls_file.write(str(transl_file.steps) + "\n")
def main(_): tf.logging.set_verbosity(tf.logging.INFO) model_dir = os.path.expanduser(FLAGS.model_dir) output_dir = os.path.expanduser(FLAGS.output_dir) out_base_file = os.path.join(output_dir, "model.ckpt") # Copy flags.txt with the original time, so t2t-bleu can report correct # relative time. tf.gfile.MakeDirs(FLAGS.output_dir) if (not os.path.exists(os.path.join(output_dir, "flags.txt")) and os.path.exists(os.path.join(model_dir, "flags.txt"))): shutil.copy2(os.path.join(model_dir, "flags.txt"), os.path.join(output_dir, "flags.txt")) models_processed = 0 queue = deque() for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): if models_processed == 0: var_list = tf.contrib.framework.list_variables(model.filename) avg_values = {} for (name, shape) in var_list: if not name.startswith("global_step"): avg_values[name] = np.zeros(shape) models_processed += 1 tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename)) reader = tf.contrib.framework.load_checkpoint(model.filename) for name in avg_values: avg_values[name] += reader.get_tensor(name) / FLAGS.n queue.append(model) if len(queue) < FLAGS.n: continue out_file = "%s-%d" % (out_base_file, model.steps) tf_vars = [] tf.logging.info("Averaging %s" % (out_file)) for (name, value) in six.iteritems(avg_values): # TODO(martinpopel): dtype=var_dtypes[name] tf_vars.append(tf.get_variable(name, shape=value.shape)) placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] global_step = tf.get_variable( "global_step", initializer=tf.constant(model.steps, dtype=tf.int64), trainable=False) saver = tf.train.Saver(tf.global_variables()) tf.logging.info("Running session for %s" % (out_file)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for p, assign_op, (name, value) in zip( placeholders, assign_ops, six.iteritems(avg_values)): sess.run(assign_op, {p: value}) tf.logging.info("Storing to %s" % out_file) saver.save(sess, out_base_file, global_step=global_step) os.utime(out_file + ".index", (model.mtime, model.mtime)) tf.reset_default_graph() first_model = queue.popleft() reader = tf.contrib.framework.load_checkpoint(first_model.filename) for name in avg_values: avg_values[name] -= reader.get_tensor(name) / FLAGS.n