예제 #1
0
    def aling_model(model_path):
        output = '../nongit/local/synth_normal/align/'
        with LogFile(output, 'run.log'):
            action, reward, state, ids, seq_lengths = Synth.get_data()

            tf.reset_default_graph()

            model = HYPMDD(enc_cells=50,
                           dec_cells=3,
                           a_size=2,
                           s_size=0,
                           latent_size=2,
                           n_T=action.shape[1],
                           static_loops=True,
                           mmd_coef=50)

            ensure_dir(output)

            action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, = \
                Synth.generate_train_test(action, reward, state, seq_lengths, ids, 0.3, output)

            Synth.opt_model_align(model,
                                  action,
                                  reward,
                                  state,
                                  seq_lengths,
                                  test_action,
                                  test_reward,
                                  test_state,
                                  test_seq_lengths,
                                  output + 'model/',
                                  model_path,
                                  hessian_term=True,
                                  hessian_lr=0.0001,
                                  _beta=1)
예제 #2
0
    def predict(cls, rnn2rnn, action, reward, state, ids, seq_lengths,
                output_path, model_path):

        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(model_path)

        with tf.Session() as sess:
            saver.restore(sess, ckpt.model_checkpoint_path)
            enc_dict_feed = rnn2rnn.enc.enc_beh_feed(action, reward, state,
                                                     seq_lengths)
            dec_dict_feed = rnn2rnn.dec.dec_beh_feed(action, reward, state,
                                                     seq_lengths)

            enc_loss, dec_loss, z_mean = sess.run(
                [rnn2rnn.enc.loss, rnn2rnn.dec.loss, rnn2rnn.enc.z_pred],
                feed_dict={
                    **enc_dict_feed,
                    **dec_dict_feed
                })

            cls.report_model(dec_dict_feed, enc_dict_feed, rnn2rnn, sess, -1,
                             {})

            if output_path is not None:
                ensure_dir(output_path)
                df = pd.DataFrame(z_mean)
                df = pd.concat([df, pd.DataFrame({'id': ids})], axis=1)
                df.to_csv(output_path + 'z_mean.csv', index=False)

        return z_mean
예제 #3
0
 def write_diff(cls, save_path):
     DLogger.logger().debug("writing git diff to " + save_path + 'diff.txt')
     ensure_dir(save_path)
     try:
         import subprocess
         a = 'no git found'
         a = subprocess.run(["git", "diff"], stdout=subprocess.PIPE)
     finally:
         with open(save_path + "diff.txt", "w") as f:
             f.write(a.stdout.decode('utf-8'))
예제 #4
0
    def opt_model_train_test(cls, model_path):
        output = '../nongit/local/BD/opt/'
        with LogFile(output, 'run.log'):
            action, reward, state, ids, seq_lengths = BD.get_data()

            tf.reset_default_graph()

            with tf.device('/device:GPU:0'):
                model = HYPMMD(enc_cells=20,
                               dec_cells=3,
                               a_size=2,
                               s_size=0,
                               latent_size=2,
                               n_T=action.shape[1],
                               static_loops=False,
                               mmd_coef=50)

            ensure_dir(output)

            actions_train, actions_test, rewards_train, rewards_test, seq_train, seq_test, id_train, id_test = \
                stratified_train_test_split(action, reward, state, ids, seq_lengths)

            DLogger.logger().debug("test points: " +
                                   str(actions_test.shape[0]))
            DLogger.logger().debug("train points: " +
                                   str(actions_train.shape[0]))

            def lr_schedule(t):
                if t < 2000:
                    _lr = 0.001
                elif t < 5000:
                    _lr = 0.0001
                else:
                    _lr = 0.00001
                return _lr

            BD.opt_model_mddae(model,
                               actions_train,
                               rewards_train,
                               None,
                               seq_train,
                               actions_test,
                               rewards_test,
                               None,
                               seq_test,
                               output + '/model/',
                               model_path,
                               hessian_term=False,
                               lr_schedule=lr_schedule)
예제 #5
0
 def predict_z(cls, model_path):
     output = '../nongit/local/BD/opt/'
     with LogFile(output, 'run.log'):
         action, reward, state, ids, seq_lengths = BD.get_data()
         DLogger.logger().debug("data points: " + str(action.shape[0]))
         tf.reset_default_graph()
         ensure_dir(output)
         model = HYPMMD(enc_cells=20,
                        dec_cells=3,
                        a_size=2,
                        s_size=0,
                        latent_size=2,
                        n_T=action.shape[1],
                        static_loops=False)
         BD.predict(model, action, reward, state, ids, seq_lengths,
                    '../nongit/local/BD/', model_path)
예제 #6
0
    def create_onpolicy(cls,
                        n_cells,
                        z,
                        n_T,
                        output_path,
                        model_path,
                        mode="ossi"):
        tf.reset_default_graph()
        tf.set_random_seed(1)

        dec = cls.get_enc_dec(n_cells, z)

        ensure_dir(output_path)

        with LogFile(output_path, 'run.log'):
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(model_path)

            with tf.Session() as sess:
                saver.restore(sess, ckpt.model_checkpoint_path)
                freq = 1
                baseline = 4
                if mode == 'A1':
                    a1_period = lambda trial: trial < 15
                elif mode == 'A2':
                    a1_period = lambda trial: False
                else:
                    raise Exception("Unknown mode")
                states, policies, rewards, choices, rnn_states = dec.simulate_env(
                    sess,
                    10,
                    a2_generic(a1_period,
                               lambda trial: trial < 1,
                               lambda trial: trial in [],
                               init_state=None,
                               init_action=-1,
                               init_reward=0),
                    greedy=True)

                train = format_to_training_data(rewards, choices, states)

                if output_path is not None:
                    Export.policies({'id1': {
                        '1': policies
                    }}, output_path, 'policies-.csv')
                    Export.export_train(train, output_path, 'train.csv')
                    np.savetxt(output_path + 'z.csv', z[0], delimiter=',')
예제 #7
0
    def train_test(model_path):
        output = '../nongit/local/synth_normal/opt/'
        with LogFile(output, 'run.log'):
            action, reward, state, ids, seq_lengths = Synth.get_data()

            tf.reset_default_graph()

            model = HYPMDD(enc_cells=50,
                           dec_cells=3,
                           a_size=2,
                           s_size=0,
                           latent_size=2,
                           n_T=action.shape[1],
                           static_loops=False,
                           mmd_coef=50)

            ensure_dir(output)

            action, reward, state, seq_lengths, test_action, test_reward, test_state, test_seq_lengths, = \
                Synth.generate_train_test(action, reward, state, seq_lengths, ids, 0.3, output)

            def lr_schedule(t):
                if t < 2000:
                    _lr = 0.001
                elif t < 5000:
                    _lr = 0.001
                else:
                    _lr = 0.0001
                return _lr

            Synth.opt_model_mddae(model,
                                  action,
                                  reward,
                                  state,
                                  seq_lengths,
                                  test_action,
                                  test_reward,
                                  test_state,
                                  test_seq_lengths,
                                  output + 'model/',
                                  model_path,
                                  hessian_term=False,
                                  lr_schedule=lr_schedule)
예제 #8
0
    def align_model(cls, model_path):
        output = '../nongit/local/BD/align/'
        with LogFile(output, 'run.log'):
            action, reward, state, ids, seq_lengths = BD.get_data()

            tf.reset_default_graph()

            with tf.device('/device:GPU:0'):
                model = HYPMMD(enc_cells=20,
                               dec_cells=3,
                               a_size=2,
                               s_size=0,
                               latent_size=2,
                               n_T=action.shape[1],
                               static_loops=True,
                               mmd_coef=2)

            ensure_dir(output)

            actions_train, actions_test, rewards_train, rewards_test, seq_train, seq_test, id_train, id_test = \
                stratified_train_test_split(action, reward, state, ids, seq_lengths)

            DLogger.logger().debug("test points: " +
                                   str(actions_test.shape[0]))
            DLogger.logger().debug("train points: " +
                                   str(actions_train.shape[0]))

            BD.opt_model_align(model,
                               actions_train,
                               rewards_train,
                               None,
                               seq_train,
                               actions_test,
                               rewards_test,
                               None,
                               seq_test,
                               output + '/model/',
                               model_path,
                               hessian_term=True,
                               _beta=0.5,
                               hessian_lr=0.0001,
                               _h=0.1)
예제 #9
0
    def opt_model_mddae(cls,
                        rnn2rnn,
                        action,
                        reward,
                        state,
                        seq_lengths,
                        test_action=None,
                        test_reward=None,
                        test_state=None,
                        test_seq_lengths=None,
                        save_path=None,
                        init_path=None,
                        hessian_term=False,
                        _beta=1,
                        lr_schedule=lambda t: 0.001):

        cls.write_diff(save_path)
        # Optimizers
        trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        trainables_dec = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='dec')
        trainables_enc = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='enc')
        beta = tf.placeholder(Const.FLOAT, shape=())
        h = tf.placeholder(Const.FLOAT, shape=())
        lr = tf.placeholder(Const.FLOAT, shape=())

        opt_all = cls._get_apply_grads(
            rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss +
            h * rnn2rnn.dec.hess_loss, trainables, lr, tf.train.AdamOptimizer)

        opt_enc = cls._get_apply_grads(
            rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss +
            h * rnn2rnn.dec.hess_loss, trainables_enc, lr,
            tf.train.AdamOptimizer)

        opt_dec = cls._get_apply_grads(
            rnn2rnn.dec.loss + beta * rnn2rnn.enc.loss +
            h * rnn2rnn.dec.hess_loss, trainables_dec, lr,
            tf.train.AdamOptimizer)

        if init_path is not None:
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(init_path)
            DLogger.logger().debug('loaded model from: ' + init_path)
        else:
            init = tf.global_variables_initializer()

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            if init_path is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)

            if save_path:
                iter_path = save_path + '/iter-init/'
                saver = tf.train.Saver()
                ensure_dir(iter_path)
                saver.save(sess,
                           iter_path + "model.ckpt",
                           write_meta_graph=False)
                DLogger.logger().debug("Model saved in path: %s" % iter_path)

            try:

                for t in range(100000):

                    cur_beta = {beta: _beta}

                    # adaptive learning rate
                    _lr = lr_schedule(t)

                    if not hessian_term:
                        _h = 0.0
                    else:
                        _h = min(t / 1000, 0.1)

                    cur_lr = {lr: _lr}
                    cur_h = {h: _h}

                    enc_dict_feed = rnn2rnn.enc.enc_beh_feed(
                        BaseOpt.random_shuffle_action(action, seq_lengths),
                        reward, state, seq_lengths)
                    dec_dict_feed = rnn2rnn.dec.dec_beh_feed(
                        BaseOpt.random_shuffle_action(action, seq_lengths),
                        reward, state, seq_lengths)
                    _, enc_loss, dec_loss, hess_loss, rand_loss, z_grad, z_cov = sess.run(
                        [opt_all] + [
                            rnn2rnn.enc.loss, rnn2rnn.dec.loss,
                            rnn2rnn.dec.hess_loss, rnn2rnn.dec.sloss,
                            rnn2rnn.dec.z_grad, rnn2rnn.enc.z_cov
                        ],
                        feed_dict={
                            **enc_dict_feed,
                            **dec_dict_feed,
                            **cur_beta,
                            **cur_lr,
                            **cur_h
                        })

                    DLogger.logger().debug(
                        "global iter = {:4d} "
                        "enc loss: {:7.4f} "
                        "dec loss: {:7.4f} "
                        "hess loss: {:7.4f} "
                        "rand loss: {:7.4f} "
                        "beta: {:7.4f} "
                        "LR: {:7.4f} "
                        "grad z {} "
                        "z-cov: {}".format(
                            t, enc_loss, dec_loss, hess_loss, rand_loss, _beta,
                            _lr, z_grad,
                            str(
                                np.array2string(z_cov.flatten(),
                                                precision=3).replace('\n',
                                                                     ''))))

                    if t % 200 == 0:
                        if test_action is not None:
                            test_enc_dict_feed = rnn2rnn.enc.enc_beh_feed(
                                BaseOpt.random_shuffle_action(
                                    test_action, test_seq_lengths),
                                test_reward, test_state, test_seq_lengths)
                            test_dec_dict_feed = rnn2rnn.dec.dec_beh_feed(
                                BaseOpt.random_shuffle_action(
                                    test_action, test_seq_lengths),
                                test_reward, test_state, test_seq_lengths)

                            enc_loss, dec_loss, hess_loss, rand_loss, z_grad, z_cov = sess.run(
                                [
                                    rnn2rnn.enc.loss, rnn2rnn.dec.loss,
                                    rnn2rnn.dec.hess_loss, rnn2rnn.dec.sloss,
                                    rnn2rnn.dec.z_grad, rnn2rnn.enc.z_cov
                                ],
                                feed_dict={
                                    **test_enc_dict_feed,
                                    **test_dec_dict_feed,
                                    **cur_beta,
                                    **cur_lr,
                                    **cur_h
                                })

                            DLogger.logger().debug(
                                "TEST data: global iter = {:4d} "
                                "enc loss: {:7.4f} "
                                "dec loss: {:7.4f} "
                                "hess loss: {:7.4f} "
                                "rand loss: {:7.4f} "
                                "beta: {:7.4f} "
                                "LR: {:7.4f} "
                                "z grad {} "
                                "z-cov: {}".format(
                                    t, enc_loss, dec_loss, hess_loss,
                                    rand_loss, _beta, _lr, z_grad,
                                    str(
                                        np.array2string(z_cov.flatten(),
                                                        precision=3).replace(
                                                            '\n', ''))))

                        if save_path:
                            iter_path = save_path + '/iter-' + str(t) + '/'
                            saver = tf.train.Saver()
                            ensure_dir(iter_path)
                            saver.save(sess,
                                       iter_path + "model.ckpt",
                                       write_meta_graph=False)
                            DLogger.logger().debug("Model saved in path: %s" %
                                                   iter_path)

            finally:
                if save_path:
                    saver = tf.train.Saver()
                    ensure_dir(save_path)
                    save_path = saver.save(sess, save_path + "model.ckpt")
                    DLogger.logger().debug("Model saved in path: %s" %
                                           save_path)
예제 #10
0
    def generate_off(cls):
        rewards = [0] * 10
        rewards[4] = 1
        # rewards[14] = 1
        actions = [0] * 10

        ind = 0
        for kappa in np.linspace(-1.2, 1.2, num=15):
            beta = 3
            z_dim = 1
            other_dim = 0

            path = "../nongit/local/synth/sims/dims/A1/z0/_" + str(ind) + '/'
            ensure_dir(path)

            pol = np.array(cls.sim_OFF(0.2, beta, kappa, rewards, actions))
            polpd = pd.DataFrame({
                '0': pol,
                '1': 1 - pol,
                'id': 'id1',
                'block': 1
            })
            polpd.to_csv(path + "policies-.csv")

            train = pd.DataFrame({
                'reward': rewards,
                'action': actions,
                'state0': '',
                'id': 'id1',
                'block': 1
            })
            train.to_csv(path + "train.csv")

            np.savetxt(path + "z.csv",
                       np.array([[beta, kappa]]),
                       delimiter=',')

            pd.DataFrame({
                'z_dim': [z_dim],
                'other_dim': [other_dim]
            }).to_csv(path + "z_info.csv")
            ind += 1

        ind = 0
        for beta in np.linspace(0, 9, num=15):
            kappa = 0
            z_dim = 0
            other_dim = 1

            path = "../nongit/local/synth/sims/dims/A1/z1/_" + str(ind) + '/'
            ensure_dir(path)

            pol = np.array(cls.sim_OFF(0.2, beta, kappa, rewards, actions))
            polpd = pd.DataFrame({
                '0': pol,
                '1': 1 - pol,
                'id': 'id1',
                'block': 1
            })
            polpd.to_csv(path + "policies-.csv")

            train = pd.DataFrame({
                'reward': rewards,
                'action': actions,
                'state0': '',
                'id': 'id1',
                'block': 1
            })
            train.to_csv(path + "train.csv")

            np.savetxt(path + "z.csv",
                       np.array([[beta, kappa]]),
                       delimiter=',')

            pd.DataFrame({
                'z_dim': [z_dim],
                'other_dim': [other_dim]
            }).to_csv(path + "z_info.csv")
            ind += 1