Exemplo n.º 1
0
def run_training(session, config=FLAGS):
    save_config(config.summaries_dir, config)

    train_files = get_tfrecord_files(config)
    batch_number = len(train_files) // config.batch_size
    logging.info('Total number of batches  %d' % batch_number)

    params = tf.contrib.training.HParams(
        learning_rate=config.learning_rate,
        pkeep_conv=0.75,
        device=config.device,
        epoch=config.epoch,
        batch_size=config.batch_size,
        min_eval_frequency=500,
        train_steps=None,  # Use train feeder until its empty
        eval_steps=1,  # Use 1 step of evaluation feeder
        train_files=train_files
    )
    run_config = tf.contrib.learn.RunConfig(model_dir=config.checkpoint_dir)
    learn_runner.run(
        experiment_fn=experiment_fn,  # First-class function
        run_config=run_config,  # RunConfig
        schedule="train",  # What to run
        hparams=params  # HParams
    )
Exemplo n.º 2
0
 def test_fail_invalid_hparams_type(self):
   run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
   with self.assertRaisesRegexp(ValueError, _INVALID_HPARAMS_ERR_MSG):
     learn_runner.run(build_experiment_for_run_config,
                      run_config=run_config,
                      schedule="local_run",
                      hparams=["hparams"])
Exemplo n.º 3
0
def main(argv=None):
    """Run a Tensorflow model on the Criteo dataset."""
    env = json.loads(os.environ.get('TF_CONFIG', '{}'))
    # First find out if there's a task value on the environment variable.
    # If there is none or it is empty define a default one.
    task_data = env.get('task') or {'type': 'master', 'index': 0}
    argv = sys.argv if argv is None else argv
    args = create_parser().parse_args(args=argv[1:])

    trial = task_data.get('trial')
    if trial is not None:
        output_dir = os.path.join(args.output_path, trial)
    else:
        output_dir = args.output_path

    # Do only evaluation if instructed so, or call Experiment's run.
    if args.eval_only_summary_filename:
        experiment = get_experiment_fn(args)(output_dir)
        # Note that evaluation here will appear as 'one_pass' in tensorboard.
        results = experiment.evaluate(delay_secs=0)
        # Converts numpy types to native types for json dumps.
        json_out = json.dumps(
            {key: value.tolist()
             for key, value in results.iteritems()})
        with tf.Session():
            tf.write_file(args.eval_only_summary_filename, json_out).run()
    else:
        learn_runner.run(experiment_fn=get_experiment_fn(args),
                         output_dir=output_dir)
Exemplo n.º 4
0
def main():

    args_parser = argparse.ArgumentParser()
    args = parameters.initialise_arguments(args_parser)
    parameters.HYPER_PARAMS = hparam.HParams(**args.__dict__)

    # Set python level verbosity
    tf.logging.set_verbosity(args.verbosity)

    # Set C++ Graph Execution level verbosity
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(
        tf.logging.__dict__[args.verbosity] / 10)

    # Directory to store output model and checkpoints
    output_dir = args.job_dir

    # Run the training job
    learn_runner.run(experiment.generate_experiment_fn(
        min_eval_frequency=args.min_eval_frequency,
        eval_delay_secs=args.eval_delay_secs,
        train_steps=args.train_steps,
        eval_steps=args.eval_steps,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(
                serving.SERVING_FUNCTIONS[args.export_format],
                exports_to_keep=1,
                default_output_alternative_key=None,
            )
        ]),
                     run_config=run_config.RunConfig(model_dir=output_dir),
                     hparams=parameters.HYPER_PARAMS)
Exemplo n.º 5
0
def run(data_dir, model, output_dir, train_steps, eval_steps, schedule):
    """Runs an Estimator locally or distributed.

  Args:
    data_dir: The directory the data can be found in.
    model: The name of the model to use.
    output_dir: The directory to store outputs in.
    train_steps: The number of steps to run training for.
    eval_steps: The number of steps to run evaluation for.
    schedule: (str) The schedule to run. The value here must
      be the name of one of Experiment's methods.
  """
    exp_fn = make_experiment_fn(data_dir=data_dir,
                                model_name=model,
                                train_steps=train_steps,
                                eval_steps=eval_steps)

    # Create hparams and run_config
    run_config = create_run_config(output_dir)
    hparams = create_hparams(FLAGS.hparams_set,
                             data_dir,
                             passed_hparams=FLAGS.hparams)

    if is_chief():
        save_metadata(output_dir, hparams)

    learn_runner.run(experiment_fn=exp_fn,
                     schedule=schedule,
                     run_config=run_config,
                     hparams=hparams)
Exemplo n.º 6
0
def main(_argv):
    """The entrypoint for the script"""
    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()
    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Exemplo n.º 7
0
    def train(self):
        experiment_fn = self._generate_experiment_fn()
        hparams = HParams(**self.customer_params)

        learn_runner.run(experiment_fn,
                         run_config=self._build_run_config(),
                         hparams=hparams)
Exemplo n.º 8
0
def main(argv=None):
  """Runs a Tensorflow model on the Iris dataset."""
  args = parse_arguments(sys.argv if argv is None else argv)

  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  # First find out if there's a task value on the environment variable.
  # If there is none or it is empty define a default one.
  task_data = env.get('task') or {'type': 'master', 'index': 0}

  trial = task_data.get('trial')
  if trial is not None:
    output_dir = os.path.join(args.output_path, trial)
  else:
    output_dir = args.output_path

  learn_runner.run(
      experiment_fn=make_experiment_fn(
          train_data_paths=args.train_data_paths,
          eval_data_paths=args.eval_data_paths,
          metadata_path=args.metadata_path,
          max_steps=args.max_steps,
          layer1_size=args.layer1_size,
          layer2_size=args.layer2_size,
          learning_rate=args.learning_rate,
          epsilon=args.epsilon,
          batch_size=args.batch_size,
          eval_batch_size=args.eval_batch_size),
      output_dir=output_dir)
Exemplo n.º 9
0
def train_and_evaluate(args):
    train_steps = int(0.5 + (1.0 * args["num_epochs"] * args["nusers"]) /
                      args["batch_size"])
    steps_in_epoch = int(0.5 + args["nusers"] / args["batch_size"])
    print("Will train for {} steps, evaluating once every {} steps".format(
        train_steps, steps_in_epoch))

    def experiment_fn(output_dir):
        return tf.contrib.learn.Experiment(
            tf.contrib.factorization.WALSMatrixFactorization(
                num_rows=args["nusers"],
                num_cols=args["nitems"],
                embedding_dimension=args["n_embeds"],
                model_dir=args["output_dir"]),
            train_input_fn=read_dataset(tf.estimator.ModeKeys.TRAIN, args),
            eval_input_fn=read_dataset(tf.estimator.ModeKeys.EVAL, args),
            train_steps=train_steps,
            eval_steps=1,
            min_eval_frequency=steps_in_epoch)

    from tensorflow.contrib.learn.python.learn import learn_runner
    learn_runner.run(experiment_fn=experiment_fn,
                     output_dir=args["output_dir"])

    batch_predict(args)
    def train(self):
        experiment_fn = self._generate_experiment_fn()
        hparams = HParams(**self.customer_params)

        learn_runner.run(experiment_fn,
                         run_config=self._build_run_config(),
                         hparams=hparams)
Exemplo n.º 11
0
def main(_argv):
    """The entrypoint for the script"""

    # Parse YAML FLAGS
    FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
    FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
    FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
    FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
    FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

    # Load flags from config file
    final_config = {}
    if FLAGS.config_paths:
        for config_path in FLAGS.config_paths.split(","):
            config_path = config_path.strip()
            if not config_path:
                continue
            config_path = os.path.abspath(config_path)
            tf.logging.info("Loading config from %s", config_path)
            with gfile.GFile(config_path.strip()) as config_file:
                config_flags = yaml.load(config_file)
                final_config = _deep_merge_dict(
                    final_config, config_flags
                )  ###merge the flags and values from all the files into a dict

    tf.logging.info("Final Config:\n%s", yaml.dump(
        final_config))  ###print the flags and values read from all the files

    # Merge flags with config values
    for flag_key, flag_value in final_config.items(
    ):  ###map the flags and values to FLAGS in the code
        if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key),
                                                   dict):
            merged_value = _deep_merge_dict(
                flag_value, getattr(FLAGS, flag_key)
            )  ###merge the values has been defined and the new values from the config files
            setattr(FLAGS, flag_key, merged_value)
        elif hasattr(FLAGS, flag_key):
            setattr(FLAGS, flag_key, flag_value)
        else:
            tf.logging.warning("Ignoring config flag: %s", flag_key)

    if FLAGS.save_checkpoints_secs is None \
      and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()  ###creat temporary files

    if not FLAGS.input_pipeline_train:
        raise ValueError("You must specify input_pipeline_train")

    if not FLAGS.input_pipeline_dev:
        raise ValueError("You must specify input_pipeline_dev")

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Exemplo n.º 12
0
def run(data_dir, model, output_dir, train_steps, eval_steps, schedule):
  """Runs an Estimator locally or distributed.

  This function chooses one of two paths to execute:

  1. Running locally if schedule=="local_run".
  3. Distributed training/evaluation otherwise.

  Args:
    data_dir: The directory the data can be found in.
    model: The name of the model to use.
    output_dir: The directory to store outputs in.
    train_steps: The number of steps to run training for.
    eval_steps: The number of steps to run evaluation for.
    schedule: (str) The schedule to run. The value here must
      be the name of one of Experiment's methods.
  """
  exp_fn = make_experiment_fn(
      data_dir=data_dir,
      model_name=model,
      train_steps=train_steps,
      eval_steps=eval_steps)

  if schedule == "local_run":
    # Run the local demo.
    exp = exp_fn(output_dir)
    if exp.train_steps > 0 or exp.eval_steps > 0:
      tf.logging.info("Performing local training and evaluation.")
      exp.train_and_evaluate()
    decode(exp.estimator)
  else:
    # Perform distributed training/evaluation.
    learn_runner.run(
        experiment_fn=exp_fn, schedule=schedule, output_dir=output_dir)
Exemplo n.º 13
0
 def test_fail_output_dir_and_run_config_are_both_set(self):
   with self.assertRaisesRegexp(
       ValueError, _CANNOT_SET_BOTH_OUTPUT_DIR_AND_CONFIG_MSG):
     learn_runner.run(build_experiment,
                      output_dir=_MODIR_DIR,
                      schedule="simple_task",
                      run_config=run_config_lib.RunConfig())
Exemplo n.º 14
0
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  tf.logging.set_verbosity(tf.logging.INFO)
  learn_runner.run(
      experiment_fn=get_experiment_fn(args),
      output_dir=args.job_dir)
Exemplo n.º 15
0
 def test_fail_output_dir_and_run_config_are_both_set(self):
     with self.assertRaisesRegexp(
             ValueError, _CANNOT_SET_BOTH_OUTPUT_DIR_AND_CONFIG_MSG):
         learn_runner.run(build_experiment,
                          output_dir=_MODIR_DIR,
                          schedule="simple_task",
                          run_config=run_config_lib.RunConfig())
Exemplo n.º 16
0
def main():
    "Entrypoint for training."

    parser = argparse.ArgumentParser()

    parser.add_argument('--train-files',
                        help='Training files pattern globstring',
                        default='data/training/*.jpg')
    parser.add_argument('--eval-files',
                        help='Evaluation files pattern globstring',
                        default='data/validation/*.jpg')
    parser.add_argument(
        '--job-dir',
        help='Location to write checkpoints, summaries, and export models',
        required=True)
    parser.add_argument('--num-epochs',
                        help='Maximum number of epochs on which to train',
                        default=1,
                        type=int)
    parser.add_argument('--batch-size',
                        help='Batch size for training steps',
                        type=int,
                        default=128)

    args = parser.parse_args()

    tf.logging.set_verbosity(tf.logging.INFO)

    experiment_fn = generate_experiment_fn(train_files=args.train_files,
                                           eval_files=args.eval_files,
                                           batch_size=args.batch_size,
                                           num_epochs=args.num_epochs)

    learn_runner.run(experiment_fn, args.job_dir)
Exemplo n.º 17
0
def train_and_evaluate(args):
    train_steps = int(0.5 + (1.0 * args['num_epochs'] * args['nusers']) /
                      args['batch_size'])
    steps_in_epoch = int(0.5 + args['nusers'] / args['batch_size'])
    print('Will train for {} steps, evaluating once every {} steps'.format(
        train_steps, steps_in_epoch))

    def experiment_fn(output_dir):
        return tf.contrib.learn.Experiment(
            tf.contrib.factorization.WALSMatrixFactorization(
                num_rows=args['nusers'],
                num_cols=args['nitems'],
                embedding_dimension=args['n_embeds'],
                model_dir=args['output_dir']),
            train_input_fn=read_dataset(tf.estimator.ModeKeys.TRAIN, args),
            eval_input_fn=read_dataset(tf.estimator.ModeKeys.EVAL, args),
            train_steps=train_steps,
            eval_steps=1,
            min_eval_frequency=steps_in_epoch,
            export_strategies=tf.contrib.learn.utils.saved_model_export_utils.
            make_export_strategy(
                serving_input_fn=create_serving_input_fn(args)))

    from tensorflow.contrib.learn.python.learn import learn_runner
    learn_runner.run(experiment_fn, args['output_dir'])

    batch_predict(args)
Exemplo n.º 18
0
 def test_fail_invalid_hparams_type(self):
     run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
     with self.assertRaisesRegexp(ValueError, _INVALID_HPARAMS_ERR_MSG):
         learn_runner.run(build_experiment_for_run_config,
                          run_config=run_config,
                          schedule="local_run",
                          hparams=["hparams"])
Exemplo n.º 19
0
def train_and_evaluate(args):
    train_steps = int(0.5 +
                      (1.0 * args['num_epochs'] * args['n_interactions']) /
                      args['batch_size'])
    print('Will train for {} steps'.format(train_steps))

    def experiment_fn(output_dir):
        return tf.contrib.learn.Experiment(
            tf.contrib.factorization.WALSMatrixFactorization(
                num_rows=args['n_users'],
                num_cols=args['n_items'],
                embedding_dimension=args['n_embeds'],
                model_dir=args['output_dir']),
            train_input_fn=read_dataset(args['train_path'],
                                        tf.estimator.ModeKeys.TRAIN, args),
            eval_input_fn=read_dataset(args['train_path'],
                                       tf.estimator.ModeKeys.EVAL, args),
            export_strategies=[
                saved_model_export_utils.make_export_strategy(
                    serving_input_fn,
                    default_output_alternative_key=None,
                    exports_to_keep=1)
            ],
            train_steps=train_steps,
            eval_steps=None)

    from tensorflow.contrib.learn.python.learn import learn_runner
    learn_runner.run(experiment_fn, args['output_dir'])
Exemplo n.º 20
0
def run(data_dir, model, output_dir, train_steps, eval_steps, schedule):
  """Runs an Estimator locally or distributed.

  Args:
    data_dir: The directory the data can be found in.
    model: The name of the model to use.
    output_dir: The directory to store outputs in.
    train_steps: The number of steps to run training for.
    eval_steps: The number of steps to run evaluation for.
    schedule: (str) The schedule to run. The value here must
      be the name of one of Experiment's methods.
  """
  exp_fn = make_experiment_fn(
      data_dir=data_dir,
      model_name=model,
      train_steps=train_steps,
      eval_steps=eval_steps)

  # Create hparams and run_config
  run_config = create_run_config(output_dir)
  hparams = create_hparams(
      FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams)

  if is_chief():
    save_metadata(output_dir, hparams)

  learn_runner.run(
      experiment_fn=exp_fn,
      schedule=schedule,
      run_config=run_config,
      hparams=hparams)
Exemplo n.º 21
0
def main(argv=None):
  """Run a Tensorflow model on the Criteo dataset."""
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  # First find out if there's a task value on the environment variable.
  # If there is none or it is empty define a default one.
  task_data = env.get('task') or {'type': 'master', 'index': 0}
  argv = sys.argv if argv is None else argv
  args = create_parser().parse_args(args=argv[1:])

  trial = task_data.get('trial')
  if trial is not None:
    output_dir = os.path.join(args.output_path, trial)
  else:
    output_dir = args.output_path

  # Do only evaluation if instructed so, or call Experiment's run.
  if args.eval_only_summary_filename:
    experiment = get_experiment_fn(args)(output_dir)
    # Note that evaluation here will appear as 'one_pass' in tensorboard.
    results = experiment.evaluate(delay_secs=0)
    # Converts numpy types to native types for json dumps.
    json_out = json.dumps(
        {key: value.tolist() for key, value in results.iteritems()})
    with tf.Session():
      tf.write_file(args.eval_only_summary_filename, json_out).run()
  else:
    learn_runner.run(experiment_fn=get_experiment_fn(args),
                     output_dir=output_dir)
Exemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data-dir',
                        default='data/counting_mnist/',
                        help='Counting MNIST data directory')
    parser.add_argument('--batch-size',
                        default=32,
                        type=int,
                        help='Batch size')
    parser.add_argument('--learning-rate',
                        default=1e-4,
                        type=float,
                        help='Learning rate')
    parser.add_argument('--train-steps',
                        default=100000,
                        type=int,
                        help='Maximum number of training steps')
    parser.add_argument('--seed',
                        help='Random seed',
                        type=int,
                        default=random.randint(0, 2**32))
    parser.add_argument('--job-dir',
                        default='jobs/',
                        help='Job directory')
    args, _ = parser.parse_known_args()

    if args.seed is not None:
        random.seed(args.seed)
        np.random.seed(args.seed)

    tf.logging.set_verbosity(tf.logging.INFO)

    experiment_fn = generate_experiment_fn(args)
    learn_runner.run(experiment_fn, args.job_dir)
Exemplo n.º 23
0
def main(_):
    # Parse hparams from FLAGs. Format example is provided below.
    # --hparams="model__optimizer__learning_rate=0.1,model__min_kernel_size=3"
    hparams = default_hparams().parse(FLAGS.hparams)
    experiment_config = config_utils.load_experiment_config(
        FLAGS.experiment_config)
    if FLAGS.train_path is not None:
        experiment_config.train_sources[0] = FLAGS.train_path
    if FLAGS.eval_path is not None:
        experiment_config.eval_sources[0] = FLAGS.eval_path

    best_model_dir = get_best_model_dir(FLAGS.warm_start_from)
    experiment_fn = experiment.get_experiment_fn(
        experiment_config,
        warm_start_from=best_model_dir,
        train_steps=FLAGS.num_train_steps,
        eval_steps=FLAGS.num_eval_steps,
        continuous_eval_throttle_secs=FLAGS.continuous_eval_throttle_secs,
        eval_delay_secs=0)
    # To migrate to tf.estimator.RunConfig.
    run_config = learn_runner.EstimatorConfig(
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        save_summary_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max)
    learn_runner.run(experiment_fn=experiment_fn,
                     run_config=run_config,
                     hparams=hparams)
Exemplo n.º 24
0
def main(_argv):
    if FLAGS.save_checkpoints_secs is None \
        and FLAGS.save_checkpoints_steps is None:
        FLAGS.save_checkpoints_secs = 600
        tf.logging.info("Setting save_checkpoints_secs to %d",
                        FLAGS.save_checkpoints_secs)

    if not FLAGS.source_vocab_path or not FLAGS.target_vocab_path:
        raise ValueError(
            "You must specify source_vocab_path and target_vocab_path")

    if not FLAGS.output_dir:
        FLAGS.output_dir = tempfile.mkdtemp()

    if not FLAGS.source_files or not FLAGS.target_files:
        raise ValueError("You must specify source_path and target_path")

    FLAGS.source_files = FLAGS.source_files.strip().split(',')
    print(FLAGS.source_files)

    FLAGS.target_files = FLAGS.target_files.strip().split(',')

    if not FLAGS.dev_source_files or not FLAGS.dev_target_files:
        raise ValueError("You must specify dev_*_path")

    learn_runner.run(experiment_fn=create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Exemplo n.º 25
0
def main():
    tf.logging.set_verbosity(tf.logging.DEBUG)

    parsed_args = get_parser().parse_args()

    session_config = tf.ConfigProto(allow_soft_placement=True)
    session_config.gpu_options.allow_growth = True
    run_config = RunConfig(session_config=session_config)
    run_config = run_config.replace(model_dir=get_model_dir(parsed_args))

    params = HParams(learning_rate=parsed_args.lr,
                     train_steps=parsed_args.train_steps,
                     steps_per_eval=parsed_args.steps_per_eval,
                     batch_size=parsed_args.batch_size,
                     vgg_model_path=parsed_args.vgg_model_path,
                     selector=parsed_args.selector,
                     dropout=parsed_args.dropout,
                     ctx2out=parsed_args.ctx2out,
                     prev2out=parsed_args.prev2out,
                     dataset=parsed_args.dataset,
                     eval_steps=parsed_args.eval_steps,
                     hard_attention=parsed_args.hard_attention,
                     use_sampler=parsed_args.use_sampler,
                     bin_size=14)

    learn_runner.run(experiment_fn=experiment_fn_inner,
                     run_config=run_config,
                     schedule="continuous_train_and_eval",
                     hparams=params)
Exemplo n.º 26
0
def main():
    "Entrypoint for training."
    parser = argparse.ArgumentParser()

    parser.add_argument('--data-dir',
                        help='Directory containing data',
                        required=True)
    #default='CBT/data/records/')
    parser.add_argument('--dataset-id',
                        help='Unique id identifying dataset',
                        required=True)
    parser.add_argument(
        '--job-dir',
        help='Location to write checkpoints, summaries, and export models',
        required=True)
    parser.add_argument('--num-epochs',
                        help='Maximum number of epochs on which to train',
                        default=200,
                        type=int)
    parser.add_argument('--lr-min',
                        help='Minimum learning rate',
                        default=2e-4,
                        type=float)
    parser.add_argument('--lr-max',
                        help='Maximum learning rate',
                        default=1e-2,
                        type=float)
    parser.add_argument('--lr-step-size',
                        help='Learning rate step size (in epochs)',
                        default=10,
                        type=int)
    parser.add_argument('--grad-noise',
                        help='Gradient noise scale',
                        default=0.005,
                        type=float)
    parser.add_argument('--gpu', help='GPU ID to use', default=0, type=int)
    parser.add_argument(
        '--general',
        help='Uses the general model instead of the simplified one.',
        action='store_true')
    args = parser.parse_args()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    tf.logging.set_verbosity(tf.logging.INFO)
    print(args.general)

    experiment_fn = generate_experiment_fn(
        data_dir=args.data_dir,
        dataset_id=args.dataset_id,
        num_epochs=args.num_epochs,
        learning_rate_min=args.lr_min,
        learning_rate_max=args.lr_max,
        learning_rate_step_size=args.lr_step_size,
        gradient_noise_scale=args.grad_noise,
        is_general=args.general)
    learn_runner.run(experiment_fn, args.job_dir)
Exemplo n.º 27
0
 def test_fail_hparams_are_set(self):
     hparams = _HPARAMS
     with self.assertRaisesRegexp(
             ValueError, _HPARAMS_CANNOT_BE_SET_FOR_OUTPUT_DIR_MSG):
         learn_runner.run(build_experiment,
                          _MODIR_DIR,
                          schedule="simple_task",
                          hparams=hparams)
Exemplo n.º 28
0
def main(argv=None):
  """Run a Tensorflow model on the Iris dataset."""
  args = parse_arguments(sys.argv if argv is None else argv)

  tf.logging.set_verbosity(tf.logging.INFO)
  learn_runner.run(
      experiment_fn=get_experiment_fn(args),
      output_dir=args.job_dir)
Exemplo n.º 29
0
def main(argv=None):
  """Run a Tensorflow model on the Iris dataset."""
  args = parse_arguments(sys.argv if argv is None else argv)

  tf.logging.set_verbosity(tf.logging.INFO)
  learn_runner.run(
      experiment_fn=get_experiment_fn(args),
      output_dir=args.job_dir)
Exemplo n.º 30
0
def main(unused_argv):
    tf.flags.mark_flag_as_required('model_dir')
    tf.flags.mark_flag_as_required('pipeline_config_path')
    config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
    learn_runner.run(experiment_fn=build_experiment_fn(FLAGS.num_train_steps,
                                                       FLAGS.num_eval_steps),
                     run_config=config,
                     hparams=model_hparams.create_hparams())
Exemplo n.º 31
0
def main(argv=None):
    args = parse_arguments(sys.argv if argv is None else argv)
    local_analysis(args)
    set_logging_level(args)
    # Supress TensorFlow Debugging info.
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    learn_runner.run(experiment_fn=get_experiment_fn(args),
                     output_dir=args.job_dir)
Exemplo n.º 32
0
def main(unused_argv):
  tf.flags.mark_flag_as_required('model_dir')
  tf.flags.mark_flag_as_required('pipeline_config_path')
  config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
  learn_runner.run(
      experiment_fn=build_experiment_fn(FLAGS.num_train_steps,
                                        FLAGS.num_eval_steps),
      run_config=config,
      hparams=model_hparams.create_hparams())
Exemplo n.º 33
0
  def test_fail_not_experiment(self):
    def _experiment_fn(run_config, hparams):
      del run_config, hparams  # unused.
      return "not experiment"

    run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
    with self.assertRaisesRegexp(TypeError, _NOT_EXP_TYPE_MSG):
      learn_runner.run(_experiment_fn,
                       run_config=run_config,
                       schedule="simple_task")
Exemplo n.º 34
0
def main(_argv):
    """Main function
  """
    schedules = ['train', 'evaluate', 'continuous_eval']
    assert FLAGS.schedule in schedules,\
                        "Only schedules: %s supported!"%(','.join(schedules))

    learn_runner.run(experiment_fn=_create_experiment,
                     output_dir=FLAGS.output_dir,
                     schedule=FLAGS.schedule)
Exemplo n.º 35
0
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)
  local_analysis(args)
  set_logging_level(args)
  # Supress TensorFlow Debugging info.
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

  learn_runner.run(
      experiment_fn=get_experiment_fn(args),
      output_dir=args.job_dir)
Exemplo n.º 36
0
    def test_fail_not_experiment(self):
        def _experiment_fn(run_config, hparams):
            del run_config, hparams  # unused.
            return "not experiment"

        run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
        with self.assertRaisesRegexp(TypeError, _NOT_EXP_TYPE_MSG):
            learn_runner.run(_experiment_fn,
                             run_config=run_config,
                             schedule="simple_task")
Exemplo n.º 37
0
def main():
    "Entrypoint for training."
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--data-dir',
        help='Directory containing data',
        default='data/babi/records/')
    parser.add_argument(
        '--dataset-id',
        help='Unique id identifying dataset',
        required=True)
    parser.add_argument(
        '--job-dir',
        help='Location to write checkpoints, summaries, and export models',
        required=True)
    parser.add_argument(
        '--num-epochs',
        help='Maximum number of epochs on which to train',
        default=200,
        type=int)
    parser.add_argument(
        '--lr-min',
        help='Minimum learning rate',
        default=2e-4,
        type=float)
    parser.add_argument(
        '--lr-max',
        help='Maximum learning rate',
        default=1e-2,
        type=float)
    parser.add_argument(
        '--lr-step-size',
        help='Learning rate step size (in epochs)',
        default=10,
        type=int)
    parser.add_argument(
        '--grad-noise',
        help='Gradient noise scale',
        default=0.005,
        type=float)

    args = parser.parse_args()

    tf.logging.set_verbosity(tf.logging.INFO)

    experiment_fn = generate_experiment_fn(
        data_dir=args.data_dir,
        dataset_id=args.dataset_id,
        num_epochs=args.num_epochs,
        learning_rate_min=args.lr_min,
        learning_rate_max=args.lr_max,
        learning_rate_step_size=args.lr_step_size,
        gradient_noise_scale=args.grad_noise)
    learn_runner.run(experiment_fn, args.job_dir)
Exemplo n.º 38
0
def train_and_eval(job_dir=None):
    print("Begin training and evaluation")

    # if local eval and no args passed, default
    if job_dir is None: job_dir = 'models/'

    # Ensure path has a '/' at the end
    if job_dir[-1] != '/': job_dir += '/'

    gcs_base = 'https://storage.googleapis.com/'  # No need to change
    # small_version, medium_version, large_version
    # Note: large_version is 2.7GB and medium_version is 273MB
    gcs_path = 'dataset-uploader/criteo-kaggle/small_version/'  # Path to the folder with the files
    trainfile = 'train.csv'
    testfile = 'eval.csv'
    local_path = 'dataset_files'
    train_file = base.maybe_download(trainfile, local_path,
                                     gcs_base + gcs_path + trainfile)
    test_file = base.maybe_download(testfile, local_path,
                                    gcs_base + gcs_path + testfile)

    training_mode = 'learn_runner'
    train_steps = 1000
    test_steps = 100
    model_type = 'DEEP'

    model_dir = job_dir + 'model_' + model_type + '_' + str(int(time.time()))
    print("Saving model checkpoints to " + model_dir)
    export_dir = model_dir + '/exports'

    # Manually train and export model
    if training_mode == 'manual':
        # In this function, editing below here is unlikely to be needed
        m = build_estimator(model_type, model_dir)

        m.fit(input_fn=generate_input_fn(train_file), steps=train_steps)
        print('fit done')

        results = m.evaluate(input_fn=generate_input_fn(test_file),
                             steps=test_steps)
        print('evaluate done')

        print('Accuracy: %s' % results['accuracy'])

        export_folder = m.export_savedmodel(export_dir_base=export_dir,
                                            input_fn=serving_input_fn)

        print('Model exported to ' + export_dir)

    elif training_mode == 'learn_runner':
        # use learn_runner
        experiment_fn = generate_experiment(model_dir, train_file, test_file,
                                            model_type)

        learn_runner.run(experiment_fn, model_dir)
Exemplo n.º 39
0
def main(_argv):
  """The entrypoint for the script"""

  # Parse YAML FLAGS
  FLAGS.hooks = _maybe_load_yaml(FLAGS.hooks)
  FLAGS.metrics = _maybe_load_yaml(FLAGS.metrics)
  FLAGS.model_params = _maybe_load_yaml(FLAGS.model_params)
  FLAGS.input_pipeline_train = _maybe_load_yaml(FLAGS.input_pipeline_train)
  FLAGS.input_pipeline_dev = _maybe_load_yaml(FLAGS.input_pipeline_dev)

  # Load flags from config file
  final_config = {}
  if FLAGS.config_paths:
    for config_path in FLAGS.config_paths.split(","):
      config_path = config_path.strip()
      if not config_path:
        continue
      config_path = os.path.abspath(config_path)
      tf.logging.info("Loading config from %s", config_path)
      with gfile.GFile(config_path.strip()) as config_file:
        config_flags = yaml.load(config_file)
        final_config = _deep_merge_dict(final_config, config_flags)

  tf.logging.info("Final Config:\n%s", yaml.dump(final_config))

  # Merge flags with config values
  for flag_key, flag_value in final_config.items():
    if hasattr(FLAGS, flag_key) and isinstance(getattr(FLAGS, flag_key), dict):
      merged_value = _deep_merge_dict(flag_value, getattr(FLAGS, flag_key))
      setattr(FLAGS, flag_key, merged_value)
    elif hasattr(FLAGS, flag_key):
      setattr(FLAGS, flag_key, flag_value)
    else:
      tf.logging.warning("Ignoring config flag: %s", flag_key)

  if FLAGS.save_checkpoints_secs is None \
    and FLAGS.save_checkpoints_steps is None:
    FLAGS.save_checkpoints_secs = 600
    tf.logging.info("Setting save_checkpoints_secs to %d",
                    FLAGS.save_checkpoints_secs)

  if not FLAGS.output_dir:
    FLAGS.output_dir = tempfile.mkdtemp()

  if not FLAGS.input_pipeline_train:
    raise ValueError("You must specify input_pipeline_train")

  if not FLAGS.input_pipeline_dev:
    raise ValueError("You must specify input_pipeline_dev")

  learn_runner.run(
      experiment_fn=create_experiment,
      output_dir=FLAGS.output_dir,
      schedule=FLAGS.schedule)
Exemplo n.º 40
0
def main(_):
    if not FLAGS.vocab_size:
        FLAGS.vocab_size = len(open(FLAGS.vocab_file).readlines())
    if FLAGS.fast:
        FastTrain()
    elif FLAGS.train_records:
        if FLAGS.export_dir:
            tf.logging.warn(
                "Exporting savedmodels not supported for contrib experiment, --nofast"
            )
        learn_runner.run(experiment_fn=Experiment, output_dir=FLAGS.model_dir)
Exemplo n.º 41
0
def main(argv=None):
    hparams = HParams(batch_size=128, hidden_units=[256], learning_rate=.001)

    output_dir = 'test'

    config = learn.RunConfig(save_checkpoints_secs=600,
                             model_dir=output_dir,
                             gpu_memory_fraction=1)
    learn_runner.run(experiment_fn=_experiment_fn,
                     run_config=config,
                     hparams=hparams)
Exemplo n.º 42
0
  def test_basic_run_config_uid_check(self):
    expected_run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)

    def _experiment_fn(run_config, hparams):
      del run_config, hparams  # unused.
      # Explicitly use a new run_config.
      new_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR + "/123")

      return TestExperiment(config=new_config)

    with self.assertRaisesRegexp(RuntimeError, _RUN_CONFIG_UID_CHECK_ERR_MSG):
      learn_runner.run(experiment_fn=_experiment_fn,
                       run_config=expected_run_config)
Exemplo n.º 43
0
def main(args):
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))

  # Print the job data as provided by the service.
  logging.info('Original job data: %s', env.get('job', {}))

  # First find out if there's a task value on the environment variable.
  # If there is none or it is empty define a default one.
  task_data = env.get('task', {'type': 'master', 'index': 0})
  trial = task_data.get('trial')
  if trial is not None:
    args.output_path = os.path.join(args.output_path, trial)

  learn_runner.run(make_experiment_fn(args), args.output_path)
Exemplo n.º 44
0
  def test_fail_invalid_experiment_config_type(self):
    expected_run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)

    def _experiment_fn(run_config, hparams):
      del run_config, hparams  # unused.
      # Explicitly use a new run_config without `uid` method.
      new_config = core_run_config_lib.RunConfig(
          model_dir=_MODIR_DIR + "/123")

      return TestExperiment(config=new_config)

    with self.assertRaisesRegexp(RuntimeError,
                                 _MISSING_RUN_CONFIG_UID_ERR_MSG):
      learn_runner.run(experiment_fn=_experiment_fn,
                       run_config=expected_run_config)
Exemplo n.º 45
0
Arquivo: task.py Projeto: spwcd/QTML
def main(argv):
    parser = argparse.ArgumentParser()
    # You must accept a --job-dir argument when running on Cloud ML Engine. It specifies where checkpoints should be saved.
    # You can define additional user arguments which will have to be specified after an empty arg -- on the command line:
    # gcloud ml-engine jobs submit training jobXXX --job-dir=... --ml-engine-args -- --user-args
    parser.add_argument('--job-dir', default="checkpoints", help='GCS or local path where to store training checkpoints')
    args = parser.parse_args()
    arguments = args.__dict__
    arguments['data'] = "data" # Hard-coded here: training data will be downloaded to folder 'data'.

    # learn_runner needs an experiment function with a single parameter: the output directory.
    # Here we pass additional command line arguments through a closure.
    output_dir = arguments.pop('job_dir')
    experiment_fn = lambda output_dir: experiment_fn_with_params(output_dir, **arguments)
    learn_runner.run(experiment_fn, output_dir)
Exemplo n.º 46
0
 def test_run_with_explicit_local_run(self):
   run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
   self.assertEqual(
       "local_run-" + _MODIR_DIR,
       learn_runner.run(build_experiment_for_run_config,
                        run_config=run_config,
                        schedule="local_run"))
Exemplo n.º 47
0
 def test_schedule_from_tf_config_runs_train_on_worker(self):
     os.environ["TF_CONFIG"] = json.dumps(
         {"cluster": build_distributed_cluster_spec(), "task": {"type": tf.contrib.learn.TaskType.WORKER}}
     )
     # RunConfig constructor will set job_name from TF_CONFIG.
     config = run_config.RunConfig()
     self.assertEqual("train", learn_runner.run(lambda output_dir: TestExperiment(config=config), output_dir="/tmp"))
Exemplo n.º 48
0
 def test_no_schedule_and_non_distributed_runs_train_and_evaluate(self):
   config = run_config.RunConfig(
       cluster_spec=build_non_distributed_cluster_spec())
   self.assertEqual(
       "train_and_evaluate",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
Exemplo n.º 49
0
 def test_schedule_from_tf_config_runs_serve_on_ps(self):
     tf_config = {"cluster": build_distributed_cluster_spec(), "task": {"type": tf.contrib.learn.TaskType.PS}}
     with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
         config = run_config.RunConfig()
         self.assertEqual(
             "run_std_server", learn_runner.run(lambda output_dir: TestExperiment(config=config), output_dir="/tmp")
         )
Exemplo n.º 50
0
 def test_schedule_from_config_runs_train_on_worker(self):
   config = run_config.RunConfig(
       job_name="worker", cluster_spec=build_distributed_cluster_spec())
   self.assertEqual(
       "train",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
Exemplo n.º 51
0
 def test_run_with_custom_schedule(self):
   run_config = run_config_lib.RunConfig(model_dir=_MODIR_DIR)
   self.assertEqual(
       "simple_task, default=None.",
       learn_runner.run(build_experiment_for_run_config,
                        run_config=run_config,
                        schedule="simple_task"))
Exemplo n.º 52
0
 def test_schedule_from_tf_config(self):
   os.environ["TF_CONFIG"] = json.dumps({"task": {"type": "worker"}})
   # RunConfig constructuor will set job_name from TF_CONFIG.
   config = run_config.RunConfig()
   self.assertEqual(
       "train",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
Exemplo n.º 53
0
def main():
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  # First find out if there's a task value on the environment variable.
  # If there is none or it is empty define a default one.
  task_data = env.get('task') or {'type': 'master', 'index': 0}

  args = parse_arguments()

  trial = task_data.get('trial')
  if trial is not None:
    output_dir = os.path.join(args.output_path, trial)
  else:
    output_dir = args.output_path

  learn_runner.run(
      experiment_fn=ExperimentFn(args),
      output_dir=output_dir)
Exemplo n.º 54
0
def main(argv=None):
  """Run a Tensorflow model on the Reddit dataset."""
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  # First find out if there's a task value on the environment variable.
  # If there is none or it is empty define a default one.
  task_data = env.get('task') or {'type': 'master', 'index': 0}
  argv = sys.argv if argv is None else argv
  args = create_parser().parse_args(args=argv[1:])

  trial = task_data.get('trial')
  if trial is not None:
    output_dir = os.path.join(args.output_path, trial)
  else:
    output_dir = args.output_path

  learn_runner.run(experiment_fn=get_experiment_fn(args),
                   output_dir=output_dir)
Exemplo n.º 55
0
 def test_no_schedule_and_non_distributed_runs_train_and_evaluate(self):
   tf_config = {"cluster": build_non_distributed_cluster_spec()}
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config.RunConfig()
     self.assertEqual(
         "train_and_evaluate",
         learn_runner.run(lambda output_dir: TestExperiment(config=config),
                          output_dir="/tmp"))
Exemplo n.º 56
0
 def test_no_schedule_and_non_distributed_runs_train_and_evaluate(self):
   tf_config = {"cluster": build_non_distributed_cluster_spec()}
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config_lib.RunConfig()
     self.assertEqual(
         "train_and_evaluate-" + _MODIR_DIR,
         learn_runner.run(
             build_experiment_fn_for_output_dir(config),
             output_dir=_MODIR_DIR))
Exemplo n.º 57
0
def train_and_eval(job_dir=None, model_type='WIDE_AND_DEEP'):
  print("Begin training and evaluation")

  # if local eval and no args passed, default
  if job_dir is None: job_dir = 'models/' 

  # Ensure path has a '/' at the end
  if job_dir[-1] != '/': job_dir += '/'

  gcs_base = 'https://storage.googleapis.com/'
  gcs_path = 'cloudml-public/census/data/'
  trainfile = 'adult.data.csv'
  testfile  = 'adult.test.csv'
  local_path = 'dataset_files'
  train_file = base.maybe_download(
    trainfile, local_path, gcs_base + gcs_path + trainfile)
  test_file = base.maybe_download(
    testfile, local_path, gcs_base + gcs_path + testfile)

  training_mode = 'learn_runner'
  train_steps = 1000
  test_steps = 100

  model_dir = job_dir + 'model_' + model_type + '_' + str(int(time.time()))
  print("Saving model checkpoints to " + model_dir)
  export_dir = model_dir + '/exports'

  # Manually train and export model
  if training_mode == 'manual':
    # In this function, editing below here is unlikely to be needed
    m = build_estimator(model_type, model_dir)

    m.fit(input_fn=generate_input_fn(train_file), steps=train_steps)
    print('fit done')

    results = m.evaluate(input_fn=generate_input_fn(test_file), steps=test_steps)
    print('evaluate done')

    print('Accuracy: %s' % results['accuracy'])

    export_folder = m.export_savedmodel(
      export_dir_base = export_dir,
      input_fn=serving_input_fn
    )

    print('Model exported to ' + export_dir)


  elif training_mode == 'learn_runner':
    # use learn_runner
    experiment_fn = generate_experiment(
      model_dir, train_file, test_file, model_type)

    metrics, output_folder = learn_runner.run(experiment_fn, model_dir)

    print('Accuracy: {}'.format(metrics['accuracy']))
    print('Model exported to {}'.format(output_folder))
Exemplo n.º 58
0
 def test_schedule_from_config_runs_train_and_evaluate_on_master(self):
   config = run_config.RunConfig(
       job_name="master",
       cluster_spec=build_distributed_cluster_spec(),
       task=0,
       is_chief=True)
   self.assertEqual(
       "train_and_evaluate",
       learn_runner.run(lambda output_dir: TestExperiment(config=config),
                        output_dir="/tmp"))
Exemplo n.º 59
0
def train_and_evaluate(args):
    train_steps = int(0.5 + (1.0 * args['num_epochs'] * args['nusers']) / args['batch_size'])
    steps_in_epoch = int(0.5 + args['nusers'] / args['batch_size'])
    print('Will train for {} steps, evaluating once every {} steps'.format(train_steps, steps_in_epoch))
    def experiment_fn(output_dir):
        return tf.contrib.learn.Experiment(
            tf.contrib.factorization.WALSMatrixFactorization(
                         num_rows=args['nusers'], num_cols=args['nitems'],
                         embedding_dimension=args['n_embeds'],
                         model_dir=args['output_dir']),
            train_input_fn=read_dataset(tf.estimator.ModeKeys.TRAIN, args),
            eval_input_fn=read_dataset(tf.estimator.ModeKeys.EVAL, args),
            train_steps=train_steps,
            eval_steps=1,
            min_eval_frequency=steps_in_epoch,
            export_strategies=tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(serving_input_fn=create_serving_input_fn(args))
        )

    from tensorflow.contrib.learn.python.learn import learn_runner
    learn_runner.run(experiment_fn, args['output_dir'])
    
    batch_predict(args)
Exemplo n.º 60
0
 def test_schedule_from_tf_config_runs_train_and_evaluate_on_master(self):
   tf_config = {
       "cluster": build_distributed_cluster_spec(),
       "task": {
           "type": run_config_lib.TaskType.MASTER
       }
   }
   with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
     config = run_config.RunConfig()
     self.assertEqual(
         "train_and_evaluate",
         learn_runner.run(lambda output_dir: TestExperiment(config=config),
                          output_dir="/tmp"))