def __init__(self, dim_input, dim_output, dim_hidden=32, num_layers=4, num_particles=2, max_test_step=5): # model size self.dim_input = dim_input self.dim_output = dim_output self.dim_hidden = dim_hidden self.num_layers = num_layers self.num_particles = num_particles # learning rate self.follow_lr = tf.placeholder_with_default(input=FLAGS.follow_lr, name='follow_lr', shape=[]) self.leader_lr = tf.placeholder_with_default(input=FLAGS.leader_lr, name='leader_lr', shape=[]) self.meta_lr = tf.placeholder_with_default(input=FLAGS.meta_lr, name='meta_lr', shape=[]) # for test time self.max_test_step = max_test_step # build model self.bnn = BNN(dim_input=self.dim_input, dim_output=self.dim_output, dim_hidden=self.dim_hidden, num_layers=self.num_layers, is_bnn=True) # init model self.construct_network_weights = self.bnn.construct_network_weights # forwarding self.forward_network = self.bnn.forward_network # init input data self.follow_x = tf.placeholder(dtype=tf.float32, name='follow_x') self.follow_y = tf.placeholder(dtype=tf.float32, name='follow_y') self.leader_x = tf.placeholder(dtype=tf.float32, name='leader_x') self.leader_y = tf.placeholder(dtype=tf.float32, name='leader_y') self.valid_x = tf.placeholder(dtype=tf.float32, name='valid_x') self.valid_y = tf.placeholder(dtype=tf.float32, name='valid_y') # init parameters self.W_network_particles = None
def run_cartpole_expl(): env = gym.make('CartPole-v0') obs_dim = np.prod(env.observation_space.shape) act_dim = np.prod(env.action_space.shape) n_actions = env.action_space.n policy_hidden_dim = 256 policy = Policy(obs_dim, policy_hidden_dim, n_actions) input_dim = int(obs_dim + act_dim) output_dim = int(obs_dim) hidden_dim = 64 model = BNN(input_dim, hidden_dim, output_dim) exp = Experiment(policy, model, env, exp_name="cartpole_expl", train_model=True, calc_inf_gain=True) exp.train()
def init_estimator(self, W=None, B=None): return BNN(layers=self.layers, W=W, B=B)