def create_hyper_model(hp): global count_models # reset keras global state (hopefully releasing memory) tf.keras.backend.clear_session() gc.collect() count_models = count_models + 1 # encoder-decoder dimensions must be equal rnn_dim = hp.Choice('rnn_dim', [512]) enc_dropout = hp.Float('enc_dropout', min_value=0.4, max_value=0.7, step=0.1) dec_dropout = hp.Float('dec_dropout', min_value=0.4, max_value=0.7, step=0.1) cont_dim = hp.Choice('cont_dim', [20, 50, 120, 200, 400]) mu_force = 1.2 #hp.Choice('mu_force', [0.5, 1.2, 2.5, 5.0]) t_gumbel = hp.Choice('t_gumbel', [0.0005, 0.001, 0.02, 0.1]) style_dim = hp.Choice('style_dim', [20, 80, 150, 300]) kl_reg = hp.Choice('kl_reg', [0.2, 0.5, 0.8, 0.9]) beta_steps = hp.Choice('beta_anneal_steps', [2500, 5000]) attention = hp.Choice('attention', [0, 128, 256, 512]) lr = hp.Choice('lr', [5e-4, 5.5e-4, 6e-4, 8e-4, 1e-3]) decay = hp.Choice('decay', [0.85, 0.93, 0.95, 0.97]) decay_steps = hp.Choice('decay_steps', [2500]) rnn_type = hp.Choice('rnn_type', ["lstm", "gru"]) # rnn_dim = hp.Choice('rnn_dim', [512]) # enc_dropout = hp.Choice('enc_dropout', [0.5]) # dec_dropout = hp.Choice('dec_dropout', [0.2]) # cont_dim = hp.Choice('cont_dim', [120]) # mu_force = 1.3 # t_gumbel = hp.Choice('t_gumbel', [0.02]) # style_dim = hp.Choice('style_dim', [80]) # kl_reg = hp.Choice('kl_reg', [0.8]) # beta_steps = hp.Choice('beta_anneal_steps', [2500]) # attention = hp.Choice('attention', [128]) # lr = hp.Choice('lr', [5e-4]) # decay = hp.Choice('decay', [0.85]) # decay_steps = hp.Choice('decay_steps', [2500]) # rnn_type = hp.Choice('rnn_type', ["lstm"]) vae = MVAE(x_depth=x_depth, enc_rnn_dim=rnn_dim, enc_dropout=enc_dropout, dec_rnn_dim=rnn_dim, dec_dropout=dec_dropout, cont_dim=cont_dim, cat_dim=cat_dim, mu_force=mu_force, t_gumbel=t_gumbel, style_embed_dim=style_dim, kl_reg=kl_reg, beta_anneal_steps=beta_steps, rnn_type=rnn_type, attention=attention) schedule = tf.keras.optimizers.schedules.ExponentialDecay(lr, decay_steps, decay, staircase=False) optimizer = tfk.optimizers.Adam(learning_rate=schedule) vae.compile(optimizer=optimizer) vae.run_eagerly = True # enable annealing vae.set_kl_anneal(True) return vae
def create_model_and_train(train_segments, test_segments, x_depth, batch_size, enc_rnn_dim, dec_rnn_dim, enc_dropout, dec_dropout, cont_dim, cat_dim, mu_force, t_gumbel, style_embed_dim, kl_reg, beta_anneal_steps, rnn_type, attention, save_path, start_epoch, final_epoch, weights=None): train_iterator = load_noteseqs(train_segments, x_depth, batch_size).get_iterator() test_iterator = load_noteseqs(test_segments, x_depth, batch_size).get_iterator() vae = MVAE(x_depth=x_depth, enc_rnn_dim=enc_rnn_dim, enc_dropout=enc_dropout, dec_rnn_dim=dec_rnn_dim, dec_dropout=dec_dropout, cont_dim=cont_dim, cat_dim=cat_dim, mu_force=mu_force, t_gumbel=t_gumbel, style_embed_dim=style_embed_dim, kl_reg=kl_reg, beta_anneal_steps=beta_anneal_steps, rnn_type=rnn_type, attention=attention) optimizer = tfk.optimizers.Adam(learning_rate=5e-4) vae.compile(optimizer=optimizer) vae.run_eagerly = True save_path = save_path if (os.path.exists(save_path) == False): os.makedirs(save_path) callbacks = [ tfk.callbacks.LambdaCallback( on_epoch_end=lambda epoch, _: generate_and_save_samples( vae, epoch, save_path, cat_dim)), tfk.callbacks.LambdaCallback( on_epoch_start=lambda epoch, _: vae.reset_trackers()), tfk.callbacks.EarlyStopping(monitor='val_p_acc', min_delta=0.01, patience=5, mode='max'), tfk.callbacks.CSVLogger(save_path + 'log.csv', append=True), tfk.callbacks.ModelCheckpoint(save_path + 'weights/' + '/weights.{epoch:02d}', monitor='val_p_acc', save_weights_only=True, save_best_only=True, mode='max'), tfk.callbacks.TensorBoard(log_dir=save_path, write_graph=True, update_freq='epoch', histogram_freq=40, profile_batch='10,20') ] if weights != None: vae.load_weights(save_path + weights) history = vae.fit(train_iterator, epochs=final_epoch, initial_epoch=start_epoch, callbacks=callbacks, validation_data=test_iterator) vae.save_weights(save_path + 'weights/weights-final') return history
enc_dropout=args["enc_dropout"], dec_rnn_dim=args["dec_rnn_dim"], dec_dropout=args["dec_dropout"], cont_dim=args["cont_dim"], cat_dim=args["cat_dim"], mu_force=args["mu_force"], t_gumbel=args["t_gumbel"], style_embed_dim=args["style_embed_dim"], kl_reg=args["kl_reg"], beta_anneal_steps=args["kl_anneal"], rnn_type=rnn_type, attention=attention) optimizer = tfk.optimizers.Adam(learning_rate=5e-4) vae.compile(optimizer=optimizer) vae.run_eagerly = True now = datetime.now() save_path = args["save_path"] # copy configuration file in model directory if (os.path.exists(save_path) == False): os.makedirs(save_path) copyfile(config_file, save_path + 'train.conf') with open(save_path + 'model.txt', 'w') as f: vae.model().summary(print_fn=lambda x: f.write(x + '\n')) # register handler for Ctrl+C in order to save final weights signal.signal(signal.SIGINT, signal_handler)