def create_hparams(default_params, hparams_file=None, hparams_str=None): hparams = HParams(**default_params) if hparams_file is not None: with open(hparams_file) as f: hparams.parse_json(f.read()) if hparams_str is not None: hparams.parse(hparams_str) return hparams
def create_hparams(hparam_string=None): """Create model hyperparameters. Parse nondefault from given string.""" hparams = HParams( # The name of the architecture to use. final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0) if hparam_string: tf_logging.info('Parsing command line hparams: %s', hparam_string) hparams.parse(hparam_string) return hparams
def main(_argv): # Pass command-line arguments to RunConfig run_config = RunConfig( model_dir=tf.flags.FLAGS.model_dir, save_checkpoints_steps=tf.flags.FLAGS.save_checkpoints_steps) # Default hyperparameters hparams = HParams(l2=1e-3, lr=1e-3, hidden_layers=3, hidden_units=200) \ # Parse the hparams command-line argument hparams.parse(tf.flags.FLAGS.hparams) # Run the experiment run( experiment_fn=experiment_fn, run_config=run_config, schedule=tf.flags.FLAGS.schedule, hparams=hparams)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--train-dir', type=str, default=None) parser.add_argument('--profile', action='store_true') parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--hparams', type=str, default=None) parser.add_argument('--max-steps', type=int, default=100000) parser.add_argument('--dataset-shards', type=int, default=None) args = parser.parse_args() tf.logging.set_verbosity(tf.logging.INFO) hparams = HParams(num_edges=800, window_size=10, num_negative=5, batch_size=args.batch_size, embedding_dim=128, seed=42) if args.hparams is not None: hparams.parse(args.hparams) wikipedia_data = load_data_wikipedia_hyperlink() packed_labels = get_packed_labels(wikipedia_data['labels_sparse']) num_labels = np.max(packed_labels.labels) + 1 input_fn = make_input_fn(wikipedia_data['adjacency_list'], packed_labels, args.dataset_shards) estimator = tf.estimator.Estimator( label_clustering.make_label_clustering(num_labels), model_dir=args.train_dir, params=hparams, config=tf.estimator.RunConfig(tf_random_seed=hparams.seed)) hooks = [ tf.train.LoggingTensorHook( {'kappa_edges': 'kappa_edges_in_batch/value'}, every_n_secs=30) ] if args.profile: hooks.append(tf.train.ProfilerHook(save_secs=10)) estimator.train(input_fn, max_steps=args.max_steps, hooks=hooks)
def create_hparams(hparams_string=None, verbose=False): """Create model hyperparameters. Parse nondefault from given string.""" hparams = HParams( ################################ # General Parameters # ################################ logging_batch_index_perc=10, # Percentage of samples used from the full dataset between logging the loss for training and testing. start_with_test=True, # Determines if the model is tested first before any training loops. # The computed loss is also used to identify the best model so far. # Therefore, if this is False and use_best_as_final_model is True # the best model of the current training will be saved, which possibly # overrides an older better model. log_memory_consumption=True, epochs_per_test=1, # Number of training epochs before testing (NOTE that this includes the scheduler_type with epoch scheduling). networks_dir="nn", checkpoints_dir="checkpoints", epochs_per_checkpoint=1, # Number of epochs between checkpoints, 0 for no checkpoints at all. save_final_model=True, # Determines if the model is saved after training. use_best_as_final_model=True, # Substitutes the saved final model with the best of the current run. ################################ # Experiment Parameters # ################################ epochs=0, test_set_perc=0.05, # Percentage of samples taken from the given id_list in __init__ for testing. # Ignored when self.id_list_train is already set. Note that self.id_list_test must be set then as well. val_set_perc=0.05, # Percentage of samples taken from the given id_list in __init__ for validation. # Ignored when self.id_list_train is already set. Note that self.id_list_val should be set then as well. seed=1234, # Used to initialize torch, numpy, and random. If None, the id_list is not shuffled before taking test and validation set from it. fp16_run=False, # TODO: Not implemented. # distributed_run=False, # TODO: Find out how distributed run works. # dist_url="file://distributed.dpt", # cudnn_enabled=True, # cudnn_benchmark=False, use_gpu=False, num_gpus=1, # TODO: Change to num_devices. batch_first=False, # Note: This might not be implemented properly everywhere. variable_sequence_length_train=None, # Do samples in mini batches during training have variable length. variable_sequence_length_test=None, # Do samples in mini batches during testing have variable length. shuffle_train_set=True, # Shuffle in dataset to get mini batches. shuffle_val_set=False, # Shuffle in dataset to get mini batches. batch_size_train=1, batch_size_test=48, # batch_size_val=1, # TODO: Add again after finding all My* classes where it is missing. batch_size_benchmark=48, batch_size_synth=48, batch_size_gen_figure=48, dataset_num_workers_gpu=4, # Number of workers used in dataset when running on GPU(s). dataset_num_workers_cpu=0, # Number of workers used in dataset when running on CPU(s). dataset_pin_memory=True, dataset_load_async=True, teacher_forcing_in_test=False, # If True, the targets are also given to the model when running the test (needed for WaveNet). preload_next_batch_to_gpu=False, # If True loads the next batch to GPU while processing the current one. # This enhances GPU usage for the cost of memory, because two batches are loaded to the GPU. # TODO: This does not work yet, because cuda async does lazy loading. ################################ # Data Parameters # ################################ len_in_out_multiplier=1, out_dir=None, ################################ # Audio Parameters # ################################ # sampling_frequency=16000, # TODO: Unused? frame_size=5, # max_wav_value=32768.0, ################################ # Model Parameters # ################################ model_type=None, model_name=None, model_dir=None, # Explicitly set directory where model is stored, otherwise dir_out/networks_dir/. dropout=0.0, hidden_init=0.0, # Hidden state init value train_hidden_init=False, # Is the hidden state init value trainable # TODO: Unused? ################################ # Optimization Hyperparameters # ################################ loss_per_sample=False, # If True the loss is first averaged on each sample and then over the batch. # If False the loss is averaged over each frame in the whole batch (default). backward_retain_graph=False, # Determines if the gradient computation should do aggressive memory freeing. # Only needed when gradient computational graph is reused. optimiser_type="Adam", # "Adam", "SGD" TODO: more optimiser_args=dict(), # Set optimiser arguments. Preferred way to set learning rate: optimiser_args["lr"]=... use_saved_learning_rate=True, # Use the learning rate saved with a model after loading it. replace_inf_grads_by_zero=False, # Automatically substitute +/- inf gradients with zero during training. # dynamic_loss_scaling=True, exponential_moving_average=False, # TODO: Not implemented properly. exponential_moving_average_decay=0.9999, # Ignored when exponential_moving_average is False. scheduler_type="default", # "None", "Plateau", "Exponential","Noam", TODO: "Step", "Cyclic_cosine" scheduler_args=dict(), iterations_per_scheduler_step=None, # Number of training iterations after which the scheduler step function # is called with the current loss and total number of iterations as parameter. # If None the scheduler is not called. epochs_per_scheduler_step=None, # Number of training epochs after which the scheduler step function is # called with the current validation loss and total number of epochs. # When a model is loaded the epoch number continues from the epoch number stored in the model. grad_clip_norm_type=None, # If None no gradient clipping otherwise uses grad_clip_max_norm (small bias). grad_clip_max_norm=None, # Ignored if grad_clip_norm_type is None. grad_clip_thresh=None, # Clip absolute value of gradient (big bias). # Set optimiser or scheduler_type to ignore type configuration above. Used to try new implementations. optimiser=None, # Will be called with model parameters only. Set other parameters with partial. Example: partial(torch.optim.Adam, **args)). scheduler=None, # Will be called with optimiser only. Set other parameters with partial. Example: partial(ReduceLROnPlateau, **args)). ################################ # Synthesis Parameters # ################################ synth_vocoder="WORLD", # "WORLD", "r9y9wavenet_quantized_16k_world_feats" synth_ext="wav", # Extension of the output audio. synth_fs=16000, num_coded_sps=60, # Number of spectral features, currently always MGC. synth_dir=None, synth_acoustic_model_path=None, synth_file_suffix='', # do_post_filtering = False, # TODO: Merlin does some filtering before calling its vocoder. Possible implementation: https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/postfilters/__init__.py synth_gen_figure=False, gen_figure_ext=".pdf", epochs_per_plot=0, # No plots per epoch with <= 0. # TODO: plot in run method each ... epochs. plot_per_epoch_id_list=None, # TODO: Id(s) in the dictionary which are plotted. ) if hparams_string: logging.info('Parsing command line hparams: %s', hparams_string) hparams.parse(hparams_string) if verbose: logging.info('Final parsed hparams: %s', hparams.values()) return hparams
def main(model_dir, train_data, eval_data, vocab_file, hparams): tf.logging.set_verbosity(tf.logging.INFO) hparams_ = HParams(num_epochs=10, batch_size=16, max_steps=10000, units=150, layers=3, dropout=0.0, question_max_words=30, passage_max_words=150, predict_passage_max_words=800, answer_max_words=50, vocab_size=30000, emb_size=300, r=0.8, cudnn=False, grad_clip=5.0, tgt_sos_id=1, tgt_eos_id=2, word_vocab_file=vocab_file) hparams_.parse(hparams) hparams = hparams_ config = tf.ConfigProto() # config.intra_op_parallelism_threads = 32 # config.inter_op_parallelism_threads = 32 run_config = tf.estimator.RunConfig(log_step_count_steps=1, tf_random_seed=19830610, model_dir=model_dir, save_summary_steps=1, session_config=config) with tf.Session() as sess: test = input_fn([train_data], hparams=hparams, mode=tf.estimator.ModeKeys.EVAL, batch_size=hparams.batch_size) print(sess.run([test])) estimator = tf.estimator.Estimator(model_fn=model_fn, params=hparams, config=run_config) train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn([train_data], hparams=hparams, mode=tf.estimator.ModeKeys.TRAIN, num_epochs=hparams.num_epochs, batch_size=hparams.batch_size), max_steps=hparams.max_steps, hooks=None) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn([eval_data], hparams=hparams, mode=tf.estimator.ModeKeys.EVAL, batch_size=hparams.batch_size), exporters=[ tf.estimator.LatestExporter( name= "predict", # the name of the folder in which the model will be exported to under export serving_input_receiver_fn=partial(serving_input_fn, params=hparams), exports_to_keep=1, as_text=True) ], steps=10, throttle_secs=1200) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def main(argv): # hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1/FLAGS.sample_rate, sigma=0.000001, # h_reg=200/(np.pi * FLAGS.sample_rate)**2, r_reg=2000/(np.pi * FLAGS.sample_rate), # initial_rank=None, A=100., learning_rate=0.001) hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1 / FLAGS.sample_rate, sigma=0.0001, h_reg=200 / (np.pi * FLAGS.sample_rate)**2, r_reg=0.1, initial_rank=None, A=100., learning_rate=0.001) hparams.parse(FLAGS.hparams) with tf.variable_scope("data"): data = get_audio(datadir=FLAGS.datadir, dataset=FLAGS.dataset, hps=hparams) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if FLAGS.mps_model == 'rho_mps': model = RhoCMPS(hparams=hparams, data_iterator=data) else: model = PsiCMPS(hparams=hparams, data_iterator=data) h_l2sqnorm = tf.reduce_sum(tf.square(model.freqs)) r_l2sqnorm = tf.real(tf.reduce_sum(tf.conj(model.R) * model.R)) with tf.variable_scope("total_loss"): total_loss = model.loss + hparams.h_reg * h_l2sqnorm \ + hparams.r_reg * r_l2sqnorm with tf.variable_scope("summaries"): tf.summary.scalar("A", tf.cast(model.A, dtype=tf.float32)) tf.summary.scalar("sigma", tf.cast(model.sigma, dtype=tf.float32)) tf.summary.scalar("h_l2norm", tf.sqrt(h_l2sqnorm)) tf.summary.scalar("r_l2norm", tf.sqrt(r_l2sqnorm)) gr_rate = 2 * np.pi * hparams.sigma**2 * r_l2sqnorm / hparams.bond_dim tf.summary.scalar("gr_decay_time", 1 / gr_rate) tf.summary.scalar("model_loss", tf.reshape(model.loss, [])) tf.summary.scalar("total_loss", tf.reshape(total_loss, [])) tf.summary.audio("data", data, sample_rate=FLAGS.sample_rate, max_outputs=5) tf.summary.histogram("frequencies", model.freqs / (2 * np.pi)) if FLAGS.visualize: # Doesn't work for Datasets where batch size can't be inferred data_waveform_op = tfplot.autowrap(waveform_plot, batch=True)( data, hparams.minibatch_size * [hparams.delta_t]) tf.summary.image("data_waveform", data_waveform_op) if FLAGS.num_samples != 0: samples = model.sample(FLAGS.num_samples, FLAGS.sample_duration) sample_waveform_op = tfplot.autowrap( waveform_plot, batch=True)(samples, FLAGS.num_samples * [hparams.delta_t]) tf.summary.image("sample_waveform", sample_waveform_op) step = tf.get_variable("global_step", [], tf.int64, tf.zeros_initializer(), trainable=False) train_op = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate).minimize(total_loss, global_step=step) # TODO Unrolling in time? tf.contrib.training.train( train_op, save_checkpoint_secs=60, logdir= f"{FLAGS.logdir}/{hparams.bond_dim}_{hparams.delta_t}_{hparams.minibatch_size}" )
def main(model_dir, train_data, eval_data, word_embeddings, char_embeddings, hparams, log_devices): tf.logging.set_verbosity(tf.logging.INFO) char_embeddings_np = load_embeddings(char_embeddings) if os.path.isfile(word_embeddings + '.npy'): word_embeddings_np = np.load(word_embeddings + '.npy') else: word_embeddings_np = load_embeddings(word_embeddings) np.save(word_embeddings, word_embeddings_np) hparams_ = HParams(num_epochs=10, batch_size=16, max_steps=100, units=50, layers=3, dropout=0.0, learning_rate=0.5, question_max_words=30, question_max_chars=16, passage_max_words=800, train_passage_max_words=400, passage_max_chars=16, vocab_size=word_embeddings_np.shape[0], emb_size=300, char_vocab_size=char_embeddings_np.shape[0], char_emb_size=300, word_vocab_file=word_embeddings, char_vocab_file=char_embeddings, passage_count=10, train_passage_count=5, passage_max_len=120, r=0.8, grad_clip=5.0, attention='luong') hparams = hparams_.parse(hparams) print(hparams) config = tf.ConfigProto() config.allow_soft_placement = True config.log_device_placement = log_devices # config.intra_op_parallelism_threads = 32 # config.inter_op_parallelism_threads = 32 run_config = tf.estimator.RunConfig(log_step_count_steps=1, tf_random_seed=19830610, model_dir=model_dir, save_summary_steps=1, session_config=config) # with tf.Session() as sess: # test = input_fn( # [train_data], # hparams=hparams, # mode=tf.estimator.ModeKeys.EVAL, # batch_size=hparams.batch_size # ) # # print(sess.run([test])) estimator = tf.estimator.Estimator(model_fn=partial( model_fn, word_embeddings_np=word_embeddings_np, char_embeddings_np=char_embeddings_np), params=hparams, config=run_config) train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn([train_data], hparams=hparams, mode=tf.estimator.ModeKeys.TRAIN, num_epochs=hparams.num_epochs, batch_size=hparams.batch_size), max_steps=hparams.max_steps, hooks=None) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn([eval_data], hparams=hparams, mode=tf.estimator.ModeKeys.EVAL, batch_size=hparams.batch_size), exporters=[ tf.estimator.LatestExporter( name= "predict", # the name of the folder in which the model will be exported to under export serving_input_receiver_fn=partial(serving_input_fn, params=hparams), exports_to_keep=1, as_text=True) ], steps=100, throttle_secs=1200) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)