Пример #1
0
 def write_outputs(self, tasks, trial, split):
     """Write model prediction to disk."""
     utils.log("Writing out predictions for", tasks, split)
     distill_input_fn, _, _ = self._preprocessor.prepare_predict(
         tasks, split)
     results = self._estimator.predict(input_fn=distill_input_fn,
                                       yield_single_examples=True)
     # task name -> eid -> model-logits
     logits = collections.defaultdict(dict)
     for r in results:
         if r["task_id"] != len(self._tasks):
             r = utils.nest_dict(r, self._config.task_names)
             task_name = self._config.task_names[r["task_id"]]
             logits[task_name][r[task_name]["eid"]] = (
                 r[task_name]["logits"] if "logits" in r[task_name] else
                 r[task_name]["predictions"])
     for task_name in logits:
         utils.log("Pickling predictions for {:} {:} examples ({:})".format(
             len(logits[task_name]), task_name, split))
         if split == "train":
             if trial <= self._config.n_writes_distill:
                 utils.write_pickle(
                     logits[task_name],
                     self._config.distill_outputs(task_name, trial))
         else:
             if trial <= self._config.n_writes_test:
                 utils.write_pickle(
                     logits[task_name],
                     self._config.test_outputs(task_name, split, trial))
 def serialize_examples(self, examples, is_training, output_file):
     """Convert a set of `InputExample`s to a TFRecord file."""
     with tf.python_io.TFRecordWriter(output_file) as writer:
         for (ex_index, example) in enumerate(examples):
             if ex_index % 50000 == 0:
                 utils.log("Writing example {:} of {:}".format(
                     ex_index, len(examples)))
             tf_example = self._example_to_tf_example(example, is_training)
             writer.write(tf_example.SerializeToString())
Пример #3
0
def write_results(config, results):
  """Write out evaluate metrics to disk."""
  utils.log("Writing results to", config.results_txt)
  utils.mkdir(config.results_txt.rsplit("/", 1)[0])
  utils.write_pickle(results, config.results_pkl)
  with tf.gfile.GFile(config.results_txt, "w") as f:
    results_str = ""
    for trial_results in results:
      for task_name, task_results in trial_results.items():
        results_str += task_name + ": " + " - ".join(
            ["{:}: {:.2f}".format(k, v)
             for k, v in task_results.items()]) + "\n"
    f.write(results_str)
Пример #4
0
 def log(self, run_values=None):
     msg = '{:}/{:} = {:.1f}%'.format(
         self._iter_count, self._n_steps,
         100.0 * self._iter_count / self._n_steps)
     time_elapsed = time.time() - self._start_time
     time_per_step = time_elapsed / self._iter_count
     msg += ', GPS: {:.1f}'.format(1 / time_per_step)
     msg += ', ELAP: ' + secs_to_str(time_elapsed)
     msg += ', ETA: ' + secs_to_str(
         (self._n_steps - self._iter_count) * time_per_step)
     if run_values is not None:
         for tag, value in run_values.results.items():
             msg += ' - ' + str(tag) + (': {:.4f}'.format(value))
     utils.log(msg)
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        utils.log("Building model")

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = MultitaskModel(config, tasks, task_weights, is_training,
                               features, num_train_steps)

        # Load pre-trained weights from checkpoint
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if not config.debug:
            assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
                tvars, config.init_checkpoint)
            if config.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(config.init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(config.init_checkpoint,
                                              assignment_map)

        # Run training or prediction
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(config, model.loss,
                                                     num_train_steps)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=model.loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                training_hooks=[
                    training_utils.ETAHook(
                        config,
                        {} if config.use_tpu else dict(loss=model.loss),
                        num_train_steps)
                ])
        else:
            assert mode == tf.estimator.ModeKeys.PREDICT
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=utils.flatten_dict(model.outputs),
                scaffold_fn=scaffold_fn)

        utils.log("Building complete")
        return output_spec
Пример #6
0
def main(_):
    topdir, model_name, hparams = sys.argv[-3:]  # pylint: disable=unbalanced-tuple-unpacking
    config = configure.Config(topdir, model_name, **json.loads(hparams))

    # Setup for training
    tasks = task_builder.get_tasks(config)
    results = []
    trial = 1
    utils.rmkdir(config.checkpoints_dir)
    heading_info = "model={:}, trial {:}/{:}".format(config.model_name, trial,
                                                     config.num_trials)
    heading = lambda msg: utils.heading(msg + ": " + heading_info)

    # Train and evaluate num_trials models with different random seeds
    while trial <= config.num_trials:
        heading("Start training")
        model_runner = ModelRunner(config, tasks)
        model_runner.train()
        utils.log()

        heading("Run evaluation")
        results.append(model_runner.evaluate())
        write_results(config, results)

        if ((config.write_distill_outputs and trial <= config.n_writes_distill)
                or
            (config.write_test_outputs and trial <= config.n_writes_test)):
            heading("Write outputs")
            for task in tasks:
                if config.write_distill_outputs:
                    model_runner.write_outputs([task], trial, "train")
                if config.write_test_outputs:
                    for split in task.get_test_splits():
                        model_runner.write_outputs([task], trial, split)

        utils.rmkdir(config.checkpoints_dir)
        trial += 1
Пример #7
0
    def _serialize_dataset(self, tasks, is_training, split):
        """Writes out tfrecord examples for the specified tasks."""
        dataset_name = "_".join(sorted([task.name for task in tasks]))
        dataset_name += "_" + split
        if self._config.distill:
            dataset_name += "_distill"
        dataset_prefix = os.path.join(self._config.preprocessed_data_dir,
                                      dataset_name)
        tfrecords_path = dataset_prefix + ".tfrecord"
        metadata_path = dataset_prefix + ".metadata"
        batch_size = (self._config.train_batch_size
                      if is_training else self._config.eval_batch_size)

        utils.log("Loading dataset", dataset_name)
        n_examples = None
        sizes = {}
        if (self._config.use_tfrecords_if_existing
                and tf.gfile.Exists(metadata_path)):
            utils.log("Using already-written tfrecords")
            metadata = utils.load_json(metadata_path)
            n_examples = metadata["n_examples"]
            sizes = metadata["sizes"]

        if n_examples is None:
            utils.log("Existing tfrecords not found so creating")
            examples = []
            for task in tasks:
                task_examples = task.get_examples(split)
                sizes[task.name] = len(task_examples)
                examples += task_examples
            last_index = len(examples) - (len(examples) % batch_size)
            #while len(examples) % batch_size != 0:
            #  examples.append(PaddingInputExample())
            examples = examples[:
                                last_index]  #drop the instances if batch size does not perfectly split the set
            if is_training:
                random.shuffle(examples)
            n_examples = len(examples)
            assert n_examples % batch_size == 0
            utils.mkdir(tfrecords_path.rsplit("/", 1)[0])
            self.serialize_examples(examples, is_training, tfrecords_path)
            utils.write_json({
                "n_examples": n_examples,
                "sizes": sizes
            }, metadata_path)

        input_fn = self._input_fn_builder(tfrecords_path, is_training)
        if is_training:
            steps = int(n_examples // batch_size *
                        self._config.num_train_epochs)
        else:
            steps = n_examples // batch_size

        return input_fn, steps, sizes
Пример #8
0
 def _evaluate_task(self, task):
     """Evaluate the current model on the dev set."""
     utils.log("Evaluating", task.name)
     eval_input_fn, _, _ = self._preprocessor.prepare_eval(task)
     results = self._estimator.predict(input_fn=eval_input_fn,
                                       yield_single_examples=True)
     scorer = task.get_scorer()
     for r in results:
         if r["task_id"] != len(self._tasks):
             r = utils.nest_dict(r, self._config.task_names)
             scorer.update(r[task.name])
     utils.log(task.name + ": " + scorer.results_str())
     utils.log()
     return dict(scorer.get_results())
Пример #9
0
 def train(self):
     utils.log("Training for {:} steps".format(self.train_steps))
     self._estimator.train(input_fn=self._train_input_fn,
                           max_steps=self.train_steps)