Exemplo n.º 1
0
def predict(override_cfg, model_dir):
  """Run model over a dataset and dump predictions to json file."""
  assert FLAGS.predict_path
  cfg = _load_config(model_dir)
  cfg = utils.merge(cfg, override_cfg)
  input_fn = data.get_input_fn(
      split=cfg.dataset.eval_split,
      max_length=None,
      repeat=False,
      shuffle=False,
      cache=False,
      limit=None,
      data_path=cfg.dataset.data_path,
      vocab_path=cfg.dataset.vocab_path,
      is_tpu=False,
      use_generator=True,
      is_training=False)
  estimator = model.get_estimator(**cfg)
  predictions = dict()
  for i, prediction in enumerate(estimator.predict(input_fn)):
    predictions[prediction["id"]] = prediction["answer"]
    if i % 100 == 0:
      tf.logging.info("Prediction %s | %s: %s" % (i, prediction["id"],
                                                  prediction["answer"]))

  # Dump results to a file
  with tf.gfile.GFile(FLAGS.predict_path, "w") as f:
    json.dump(predictions, f)
Exemplo n.º 2
0
def evaluate(override_cfg, model_dir, continuous=True):
  """Run training and evaluation."""
  tf.logging.info("model_dir = " + model_dir)
  try:
    cfg = _load_config(model_dir)
  except tf.errors.NotFoundError:
    tf.logging.info("Model directory does not exist yet. Creating new config.")
    cfg = model.build_config(model_dir=model_dir, data_path=FLAGS.data_path)
  tf.logging.info(cfg)
  tf.logging.info(override_cfg)
  cfg = utils.merge(cfg, override_cfg)

  cfg.tpu.enable = False
  cfg.dataset.max_length = None

  # Construct inputs and estimator
  _, eval_input = data.build_dataset(cfg.dataset, is_tpu=cfg.tpu.enable)
  estimator = model.get_estimator(**cfg)
  if continuous:
    checkpoints_iterator = tf.contrib.training.checkpoints_iterator(
        cfg.model_dir)
    eval_metrics = None
    for ckpt_path in checkpoints_iterator:
      eval_metrics = estimator.evaluate(
          input_fn=eval_input, checkpoint_path=ckpt_path)
      tf.logging.info(pprint.pformat(eval_metrics))
    return eval_metrics
  else:
    eval_metrics = estimator.evaluate(input_fn=eval_input)
    return eval_metrics
Exemplo n.º 3
0
def train_run(estimator_params, x_train_prep, y_train, x_test_prep, y_test,
              temp_dir):
    temp_dir.mkdir(parents=True, exist_ok=True)
    _logger.info("Fitting the estimator")
    estimator, estimator_tags = get_estimator(**estimator_params)
    estimator.fit(x_train_prep, y_train)

    estimator_metrics, estimator_artifacts = evaluate_binary_classifier(
        model=estimator,
        data={
            'train': {
                'x': x_train_prep,
                'y': y_train
            },
            'test': {
                'x': x_test_prep,
                'y': y_test
            }
        },
        temp_dir=temp_dir)
    return estimator, estimator_tags, estimator_metrics, estimator_artifacts
Exemplo n.º 4
0
def train_and_eval(cfg, do_eval=True, report_fn=None):
  """Run training (and evaluation if on a GPU)."""
  tf.logging.info("cfg.model_dir = " + cfg.model_dir)
  # Save out config to model directory
  assert FLAGS.mode == "train"
  tf.gfile.MakeDirs(cfg.model_dir)
  with tf.gfile.GFile(os.path.join(cfg.model_dir, "config.json"), "w") as f:
    json.dump(cfg, f)

  if not cfg.dataset.num_repeats and not cfg.steps_per_epoch:
    raise ValueError("Must have a fixed num repeats or epoch step size.")

  # Construct inputs and estimator
  train_input, eval_input = data.build_dataset(
      cfg.dataset, is_tpu=cfg.tpu.enable)
  estimator = model.get_estimator(**cfg)

  if do_eval:
    eval_metrics = None
    for i in range(cfg.num_epochs):
      tf.logging.info("Starting epoch %s/%s" % (i + 1, cfg.num_epochs))
      train_metrics = estimator.train(
          input_fn=train_input, steps=cfg.steps_per_epoch or None)
      tf.logging.info(pprint.pformat(train_metrics))
      eval_metrics = estimator.evaluate(input_fn=eval_input)
      tf.logging.info(pprint.pformat(eval_metrics))
      if report_fn:
        report_fn(eval_metrics)
    return eval_metrics
  else:
    for i in range(cfg.num_epochs):
      tf.logging.info("Starting epoch %s/%s" % (i + 1, cfg.num_epochs))
      train_metrics = estimator.train(
          input_fn=train_input, steps=cfg.steps_per_epoch)
      tf.logging.info(pprint.pformat(train_metrics))
    return dict()
Exemplo n.º 5
0
def main(argv=None):
    """Run the CLV model."""
    argv = sys.argv if argv is None else argv
    args = create_parser().parse_args(args=argv[1:])

    # Set logging mode
    tf.logging.set_verbosity(tf.logging.INFO)

    # execute non-estimator models
    if args.model_type in PROBABILISTIC_MODEL_TYPES:
        run_btyd(args.model_type, args.data_src, args.threshold_date,
                 args.predict_end)
        return

    if args.hypertune:
        # if tuning, join the trial number to the output path
        config = json.loads(os.environ.get('TF_CONFIG', '{}'))
        trial = config.get('task', {}).get('trial', '')
        model_dir = os.path.join(args.job_dir, trial)
    else:
        model_dir = args.job_dir

    print('Running training with model {}'.format(args.model_type))

    # data path
    data_folder = '{}/'.format(args.data_src)

    # Calculate train steps and checkpoint steps based on approximate
    # training set size, batch size, and requested number of training
    # epochs.
    train_steps = (args.train_size / args.batch_size) * args.num_epochs
    checkpoint_steps = int(
        (args.train_size / args.batch_size) * (args.num_epochs / NUM_EVAL))

    # create RunConfig
    config = tf.estimator.RunConfig(save_checkpoints_steps=checkpoint_steps)

    hidden_units = [int(n) for n in args.hidden_units.split()]

    # Hyperparameters
    params = tf.contrib.training.HParams(
        num_epochs=args.num_epochs,
        train_steps=train_steps,
        batch_size=args.batch_size,
        hidden_units=hidden_units,
        learning_rate=args.learning_rate,
        ignore_crosses=args.ignore_crosses,
        buffer_size=args.buffer_size,
        learning_rate_decay=(args.learning_rate_decay == 'True'),
        l1_regularization=args.l1_regularization,
        l2_regularization=args.l2_regularization,
        optimizer=args.optimizer,
        dropout=(None if args.dropout == 0.0 else args.dropout),
        checkpoint_steps=checkpoint_steps)

    print(params)
    print('')
    print('Dataset Size:', args.train_size)
    print('Batch Size:', args.batch_size)
    print('Steps per Epoch:', args.train_size / args.batch_size)
    print('Total Train Steps:', train_steps)
    print('Required Evaluation Steps:', NUM_EVAL)
    print('Perform evaluation step after each', args.num_epochs / NUM_EVAL,
          'epochs')
    print('Save Checkpoint After', checkpoint_steps, 'steps')
    print('**********************************************')

    # Creates the relevant estimator (canned or custom)
    estimator = None

    # get model estimator
    estimator = get_estimator(estimator_name=args.model_type,
                              config=config,
                              params=params,
                              model_dir=model_dir)

    # Creates the training and eval specs by reading the relevant datasets
    # Note that TrainSpec needs max_steps otherwise it runs forever.
    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: read_train(data_folder, params),
        max_steps=train_steps)

    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: read_eval(data_folder, params),
        exporters=[
            tf.estimator.LatestExporter(
                name='estimate',
                serving_input_receiver_fn=csv_serving_input_fn,
                exports_to_keep=1,
                as_text=True)
        ],
        steps=1000,
        throttle_secs=1,
        start_delay_secs=1)

    if not args.resume:
        print('Removing previous trained model...')
        shutil.rmtree(model_dir, ignore_errors=True)
    else:
        print('Resuming training...')

    # Runs the training and evaluation using the chosen estimator.
    # Saves model data into export/estimate/1234567890/...
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)