示例#1
0
    params.save(json_path)

    # Launch training with this config
    cmd = "{python} train.py --model_dir {model_dir} --data_dir {data_dir}".format(
        python=PYTHON, model_dir=model_dir, data_dir=data_dir)
    print(cmd)
    check_call(cmd, shell=True)


if __name__ == "__main__":
    # Load the "reference" parameters from parent_dir json file
    args = parser.parse_args()
    json_path = os.path.join(args.parent_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Perform hypersearch over one parameter
    learning_rates = [1e-4, 1e-3, 1e-2]
    batch_sizes = [32, 128, 256]
    for learning_rate in learning_rates:
        for batch_size in batch_sizes:
            # Modify the relevant parameter in params
            params.learning_rate = learning_rate
            params.batch_size = batch_size

            # Launch job (name has to be unique)
            job_name = "learning_rate_{}".format(learning_rate)
            launch_training_job(args.parent_dir, args.data_dir, job_name,
                                params)
示例#2
0
parser.add_argument('--save_checkpoints_epochs',
                    default=1,
                    help="Directory containing the dataset")

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)
    random_seed = 230
    tf.set_random_seed(random_seed)
    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)
    params.batch_size = int(args.batch_size)

    assert args.save_checkpoints_steps is not None or args.save_checkpoints_epochs is not None

    if args.save_checkpoints_epochs is not None:
        files = glob.glob(os.path.join(args.data_dir, "*_train_*tfrecord"))
        num_examples = util.count_records(files)
        steps_each_epoch = num_examples // params.batch_size
        save_checkpoints_steps = steps_each_epoch * int(
            args.save_checkpoints_epochs)
    else:
        save_checkpoints_steps = int(args.save_checkpoints_steps)

    # Define the model
    tf.logging.info("Creating the model...")
    tf_config = tf.ConfigProto()