def aling_model(model_path): output = '../nongit/local/synth_normal/align/' with LogFile(output, 'run.log'): action, reward, state, ids, seq_lengths = Synth.get_data() tf.reset_default_graph() model = HYPMDD(enc_cells=50, dec_cells=3, a_size=2, s_size=0, latent_size=2, n_T=action.shape[1], static_loops=True, mmd_coef=50) ensure_dir(output) action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, = \ Synth.generate_train_test(action, reward, state, seq_lengths, ids, 0.3, output) Synth.opt_model_align(model, action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, output + 'model/', model_path, hessian_term=True, hessian_lr=0.0001, _beta=1)
def predict(cls, rnn2rnn, action, reward, state, ids, seq_lengths, output_path, model_path): saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) with tf.Session() as sess: saver.restore(sess, ckpt.model_checkpoint_path) enc_dict_feed = rnn2rnn.enc.enc_beh_feed(action, reward, state, seq_lengths) dec_dict_feed = rnn2rnn.dec.dec_beh_feed(action, reward, state, seq_lengths) enc_loss, dec_loss, z_mean = sess.run( [rnn2rnn.enc.loss, rnn2rnn.dec.loss, rnn2rnn.enc.z_pred], feed_dict={ **enc_dict_feed, **dec_dict_feed }) cls.report_model(dec_dict_feed, enc_dict_feed, rnn2rnn, sess, -1, {}) if output_path is not None: ensure_dir(output_path) df = pd.DataFrame(z_mean) df = pd.concat([df, pd.DataFrame({'id': ids})], axis=1) df.to_csv(output_path + 'z_mean.csv', index=False) return z_mean
def write_diff(cls, save_path): DLogger.logger().debug("writing git diff to " + save_path + 'diff.txt') ensure_dir(save_path) try: import subprocess a = 'no git found' a = subprocess.run(["git", "diff"], stdout=subprocess.PIPE) finally: with open(save_path + "diff.txt", "w") as f: f.write(a.stdout.decode('utf-8'))
def opt_model_train_test(cls, model_path): output = '../nongit/local/BD/opt/' with LogFile(output, 'run.log'): action, reward, state, ids, seq_lengths = BD.get_data() tf.reset_default_graph() with tf.device('/device:GPU:0'): model = HYPMMD(enc_cells=20, dec_cells=3, a_size=2, s_size=0, latent_size=2, n_T=action.shape[1], static_loops=False, mmd_coef=50) ensure_dir(output) actions_train, actions_test, rewards_train, rewards_test, seq_train, seq_test, id_train, id_test = \ stratified_train_test_split(action, reward, state, ids, seq_lengths) DLogger.logger().debug("test points: " + str(actions_test.shape[0])) DLogger.logger().debug("train points: " + str(actions_train.shape[0])) def lr_schedule(t): if t < 2000: _lr = 0.001 elif t < 5000: _lr = 0.0001 else: _lr = 0.00001 return _lr BD.opt_model_mddae(model, actions_train, rewards_train, None, seq_train, actions_test, rewards_test, None, seq_test, output + '/model/', model_path, hessian_term=False, lr_schedule=lr_schedule)
def predict_z(cls, model_path): output = '../nongit/local/BD/opt/' with LogFile(output, 'run.log'): action, reward, state, ids, seq_lengths = BD.get_data() DLogger.logger().debug("data points: " + str(action.shape[0])) tf.reset_default_graph() ensure_dir(output) model = HYPMMD(enc_cells=20, dec_cells=3, a_size=2, s_size=0, latent_size=2, n_T=action.shape[1], static_loops=False) BD.predict(model, action, reward, state, ids, seq_lengths, '../nongit/local/BD/', model_path)
def create_onpolicy(cls, n_cells, z, n_T, output_path, model_path, mode="ossi"): tf.reset_default_graph() tf.set_random_seed(1) dec = cls.get_enc_dec(n_cells, z) ensure_dir(output_path) with LogFile(output_path, 'run.log'): saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) with tf.Session() as sess: saver.restore(sess, ckpt.model_checkpoint_path) freq = 1 baseline = 4 if mode == 'A1': a1_period = lambda trial: trial < 15 elif mode == 'A2': a1_period = lambda trial: False else: raise Exception("Unknown mode") states, policies, rewards, choices, rnn_states = dec.simulate_env( sess, 10, a2_generic(a1_period, lambda trial: trial < 1, lambda trial: trial in [], init_state=None, init_action=-1, init_reward=0), greedy=True) train = format_to_training_data(rewards, choices, states) if output_path is not None: Export.policies({'id1': { '1': policies }}, output_path, 'policies-.csv') Export.export_train(train, output_path, 'train.csv') np.savetxt(output_path + 'z.csv', z[0], delimiter=',')
def train_test(model_path): output = '../nongit/local/synth_normal/opt/' with LogFile(output, 'run.log'): action, reward, state, ids, seq_lengths = Synth.get_data() tf.reset_default_graph() model = HYPMDD(enc_cells=50, dec_cells=3, a_size=2, s_size=0, latent_size=2, n_T=action.shape[1], static_loops=False, mmd_coef=50) ensure_dir(output) action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, = \ Synth.generate_train_test(action, reward, state, seq_lengths, ids, 0.3, output) def lr_schedule(t): if t < 2000: _lr = 0.001 elif t < 5000: _lr = 0.001 else: _lr = 0.0001 return _lr Synth.opt_model_mddae(model, action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, output + 'model/', model_path, hessian_term=False, lr_schedule=lr_schedule)
def align_model(cls, model_path): output = '../nongit/local/BD/align/' with LogFile(output, 'run.log'): action, reward, state, ids, seq_lengths = BD.get_data() tf.reset_default_graph() with tf.device('/device:GPU:0'): model = HYPMMD(enc_cells=20, dec_cells=3, a_size=2, s_size=0, latent_size=2, n_T=action.shape[1], static_loops=True, mmd_coef=2) ensure_dir(output) actions_train, actions_test, rewards_train, rewards_test, seq_train, seq_test, id_train, id_test = \ stratified_train_test_split(action, reward, state, ids, seq_lengths) DLogger.logger().debug("test points: " + str(actions_test.shape[0])) DLogger.logger().debug("train points: " + str(actions_train.shape[0])) BD.opt_model_align(model, actions_train, rewards_train, None, seq_train, actions_test, rewards_test, None, seq_test, output + '/model/', model_path, hessian_term=True, _beta=0.5, hessian_lr=0.0001, _h=0.1)
def opt_model_mddae(cls, rnn2rnn, action, reward, state, seq_lengths, test_action=None, test_reward=None, test_state=None, test_seq_lengths=None, save_path=None, init_path=None, hessian_term=False, _beta=1, lr_schedule=lambda t: 0.001): cls.write_diff(save_path) # Optimizers trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) trainables_dec = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='dec') trainables_enc = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='enc') beta = tf.placeholder(Const.FLOAT, shape=()) h = tf.placeholder(Const.FLOAT, shape=()) lr = tf.placeholder(Const.FLOAT, shape=()) opt_all = cls._get_apply_grads( rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss + h * rnn2rnn.dec.hess_loss, trainables, lr, tf.train.AdamOptimizer) opt_enc = cls._get_apply_grads( rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss + h * rnn2rnn.dec.hess_loss, trainables_enc, lr, tf.train.AdamOptimizer) opt_dec = cls._get_apply_grads( rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss + h * rnn2rnn.dec.hess_loss, trainables_dec, lr, tf.train.AdamOptimizer) if init_path is not None: saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(init_path) DLogger.logger().debug('loaded model from: ' + init_path) else: init = tf.global_variables_initializer() with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: if init_path is not None: saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) if save_path: iter_path = save_path + '/iter-init/' saver = tf.train.Saver() ensure_dir(iter_path) saver.save(sess, iter_path + "model.ckpt", write_meta_graph=False) DLogger.logger().debug("Model saved in path: %s" % iter_path) try: for t in range(100000): cur_beta = {beta: _beta} # adaptive learning rate _lr = lr_schedule(t) if not hessian_term: _h = 0.0 else: _h = min(t / 1000, 0.1) cur_lr = {lr: _lr} cur_h = {h: _h} enc_dict_feed = rnn2rnn.enc.enc_beh_feed( BaseOpt.random_shuffle_action(action, seq_lengths), reward, state, seq_lengths) dec_dict_feed = rnn2rnn.dec.dec_beh_feed( BaseOpt.random_shuffle_action(action, seq_lengths), reward, state, seq_lengths) _, enc_loss, dec_loss, hess_loss, rand_loss, z_grad, z_cov = sess.run( [opt_all] + [ rnn2rnn.enc.loss, rnn2rnn.dec.loss, rnn2rnn.dec.hess_loss, rnn2rnn.dec.sloss, rnn2rnn.dec.z_grad, rnn2rnn.enc.z_cov ], feed_dict={ **enc_dict_feed, **dec_dict_feed, **cur_beta, **cur_lr, **cur_h }) DLogger.logger().debug( "global iter = {:4d} " "enc loss: {:7.4f} " "dec loss: {:7.4f} " "hess loss: {:7.4f} " "rand loss: {:7.4f} " "beta: {:7.4f} " "LR: {:7.4f} " "grad z {} " "z-cov: {}".format( t, enc_loss, dec_loss, hess_loss, rand_loss, _beta, _lr, z_grad, str( np.array2string(z_cov.flatten(), precision=3).replace('\n', '')))) if t % 200 == 0: if test_action is not None: test_enc_dict_feed = rnn2rnn.enc.enc_beh_feed( BaseOpt.random_shuffle_action( test_action, test_seq_lengths), test_reward, test_state, test_seq_lengths) test_dec_dict_feed = rnn2rnn.dec.dec_beh_feed( BaseOpt.random_shuffle_action( test_action, test_seq_lengths), test_reward, test_state, test_seq_lengths) enc_loss, dec_loss, hess_loss, rand_loss, z_grad, z_cov = sess.run( [ rnn2rnn.enc.loss, rnn2rnn.dec.loss, rnn2rnn.dec.hess_loss, rnn2rnn.dec.sloss, rnn2rnn.dec.z_grad, rnn2rnn.enc.z_cov ], feed_dict={ **test_enc_dict_feed, **test_dec_dict_feed, **cur_beta, **cur_lr, **cur_h }) DLogger.logger().debug( "TEST data: global iter = {:4d} " "enc loss: {:7.4f} " "dec loss: {:7.4f} " "hess loss: {:7.4f} " "rand loss: {:7.4f} " "beta: {:7.4f} " "LR: {:7.4f} " "z grad {} " "z-cov: {}".format( t, enc_loss, dec_loss, hess_loss, rand_loss, _beta, _lr, z_grad, str( np.array2string(z_cov.flatten(), precision=3).replace( '\n', '')))) if save_path: iter_path = save_path + '/iter-' + str(t) + '/' saver = tf.train.Saver() ensure_dir(iter_path) saver.save(sess, iter_path + "model.ckpt", write_meta_graph=False) DLogger.logger().debug("Model saved in path: %s" % iter_path) finally: if save_path: saver = tf.train.Saver() ensure_dir(save_path) save_path = saver.save(sess, save_path + "model.ckpt") DLogger.logger().debug("Model saved in path: %s" % save_path)
def generate_off(cls): rewards = [0] * 10 rewards[4] = 1 # rewards[14] = 1 actions = [0] * 10 ind = 0 for kappa in np.linspace(-1.2, 1.2, num=15): beta = 3 z_dim = 1 other_dim = 0 path = "../nongit/local/synth/sims/dims/A1/z0/_" + str(ind) + '/' ensure_dir(path) pol = np.array(cls.sim_OFF(0.2, beta, kappa, rewards, actions)) polpd = pd.DataFrame({ '0': pol, '1': 1 - pol, 'id': 'id1', 'block': 1 }) polpd.to_csv(path + "policies-.csv") train = pd.DataFrame({ 'reward': rewards, 'action': actions, 'state0': '', 'id': 'id1', 'block': 1 }) train.to_csv(path + "train.csv") np.savetxt(path + "z.csv", np.array([[beta, kappa]]), delimiter=',') pd.DataFrame({ 'z_dim': [z_dim], 'other_dim': [other_dim] }).to_csv(path + "z_info.csv") ind += 1 ind = 0 for beta in np.linspace(0, 9, num=15): kappa = 0 z_dim = 0 other_dim = 1 path = "../nongit/local/synth/sims/dims/A1/z1/_" + str(ind) + '/' ensure_dir(path) pol = np.array(cls.sim_OFF(0.2, beta, kappa, rewards, actions)) polpd = pd.DataFrame({ '0': pol, '1': 1 - pol, 'id': 'id1', 'block': 1 }) polpd.to_csv(path + "policies-.csv") train = pd.DataFrame({ 'reward': rewards, 'action': actions, 'state0': '', 'id': 'id1', 'block': 1 }) train.to_csv(path + "train.csv") np.savetxt(path + "z.csv", np.array([[beta, kappa]]), delimiter=',') pd.DataFrame({ 'z_dim': [z_dim], 'other_dim': [other_dim] }).to_csv(path + "z_info.csv") ind += 1