params.save(json_path) # Launch training with this config cmd = "{python} train.py --model_dir {model_dir} --data_dir {data_dir}".format( python=PYTHON, model_dir=model_dir, data_dir=data_dir) print(cmd) check_call(cmd, shell=True) if __name__ == "__main__": # Load the "reference" parameters from parent_dir json file args = parser.parse_args() json_path = os.path.join(args.parent_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Perform hypersearch over one parameter learning_rates = [1e-4, 1e-3, 1e-2] batch_sizes = [32, 128, 256] for learning_rate in learning_rates: for batch_size in batch_sizes: # Modify the relevant parameter in params params.learning_rate = learning_rate params.batch_size = batch_size # Launch job (name has to be unique) job_name = "learning_rate_{}".format(learning_rate) launch_training_job(args.parent_dir, args.data_dir, job_name, params)
parser.add_argument('--save_checkpoints_epochs', default=1, help="Directory containing the dataset") if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) random_seed = 230 tf.set_random_seed(random_seed) # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) params.batch_size = int(args.batch_size) assert args.save_checkpoints_steps is not None or args.save_checkpoints_epochs is not None if args.save_checkpoints_epochs is not None: files = glob.glob(os.path.join(args.data_dir, "*_train_*tfrecord")) num_examples = util.count_records(files) steps_each_epoch = num_examples // params.batch_size save_checkpoints_steps = steps_each_epoch * int( args.save_checkpoints_epochs) else: save_checkpoints_steps = int(args.save_checkpoints_steps) # Define the model tf.logging.info("Creating the model...") tf_config = tf.ConfigProto()