Exemplo n.º 1
0
def main(_):
    if FLAGS.seqio_summaries:
        subdirs = tf.io.gfile.listdir(FLAGS.summary_dir)
        summary_dirs = [os.path.join(FLAGS.summary_dir, d) for d in subdirs]
    else:
        summary_dirs = [FLAGS.summary_dir]

    scores = None
    for d in summary_dirs:
        events = eval_utils.parse_events_files(d, FLAGS.seqio_summaries)
        if FLAGS.perplexity_eval:
            task_metrics = events
        else:
            task_metrics = eval_utils.get_eval_metric_values(
                events,
                task_name=os.path.basename(d)
                if FLAGS.seqio_summaries else None)
        if scores:
            scores.update(task_metrics)
        else:
            scores = task_metrics

    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    df = eval_utils.scores_to_df(scores)
    df = eval_utils.compute_avg_glue(df)
    df = eval_utils.sort_columns(df)
    eval_utils.log_csv(df, output_file=FLAGS.out_file)
def main(_):
    events = eval_utils.parse_events_files(FLAGS.summary_dir)
    scores = eval_utils.get_eval_metric_values(events)
    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    scores = eval_utils.compute_avg_glue(scores)
    eval_utils.log_csv(scores, output_file=FLAGS.out_file)
 def test_get_eval_metric_values(self):
     events = {
         "eval/foo_task/accuracy": [(20, 1.), (30, 2.)],
         "eval/bar_task/sequence_accuracy": [(10, 3.)],
         "loss": [(40, 3.)],
     }
     eval_values = eval_utils.get_eval_metric_values(events)
     self.assertDictEqual(
         eval_values, {
             "foo_task/accuracy": [(20, 1.), (30, 2.)],
             "bar_task/sequence_accuracy": [(10, 3.)],
         })
Exemplo n.º 4
0
def main(_):
    events = eval_utils.parse_events_files(FLAGS.summary_dir)
    if FLAGS.perplexity_eval:
        scores = events
    else:
        scores = eval_utils.get_eval_metric_values(events)
    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    df = eval_utils.scores_to_df(scores)
    df = eval_utils.compute_avg_glue(df)
    df = eval_utils.sort_columns(df)
    eval_utils.log_csv(df, output_file=FLAGS.out_file)