Exemplo n.º 1
0
n_iter = 5000
disp_step = 10
save_step = 100
is_render = args.render
env_id = args.env
save_dir = "./save_" + env_id


#Create multiple environments
#----------------------------
env = MultiEnv([make_env(i, env_id=env_id) for i in range(n_env)])
a_dim = env.ac_space.shape[0]
s_dim = env.ob_space.shape[0]
a_low = env.ac_space.low[0]
a_high = env.ac_space.high[0]
runner = MultiEnvRunner(env, s_dim, a_dim, n_step, gamma, lamb)


#Placeholders
#----------------------------
#action_ph:          (mb_size, a_dim)
#old_neg_logprob_ph: (mb_size)
#old_v_pred_ph:      (mb_size)
#adv_ph:             (mb_size)
#return_ph:          (mb_size)
action_ph = tf.placeholder(tf.float32, [None, a_dim], name="action")
old_neg_logprob_ph = tf.placeholder(tf.float32, [None], name="old_negtive_log_prob")
old_v_pred_ph = tf.placeholder(tf.float32, [None], name="old_value_pred")
adv_ph = tf.placeholder(tf.float32, [None], name="advantage")
return_ph = tf.placeholder(tf.float32, [None], name="return")
lr_ph = tf.placeholder(tf.float32, [])
Exemplo n.º 2
0
eps = 1e-5
n_iter = 300000
disp_step = 100
save_step = 1000
is_render = args.render
env_id = args.env
save_dir = "./save_" + env_id

#Create multiple environments
#----------------------------
env = MultiEnv([make_env(i, env_id=env_id) for i in range(n_env)])
a_dim = env.ac_space.shape[0]
s_dim = env.ob_space.shape[0]
a_low = env.ac_space.low[0]
a_high = env.ac_space.high[0]
runner = MultiEnvRunner(env, s_dim, a_dim, n_step, gamma)

#Create the model
#----------------------------
config = tf.ConfigProto(intra_op_parallelism_threads=n_env,
                        inter_op_parallelism_threads=n_env)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
policy = PolicyModel(sess, s_dim, a_dim, a_low, a_high, name="policy")

#Placeholders
#----------------------------
#action_ph: (mb_size, a_dim)
#adv_ph:    (mb_size)
#reward_ph: (mb_size)
action_ph = tf.placeholder(tf.float32, [None, a_dim], name="action")
Exemplo n.º 3
0
lr_decay = 0.99
eps = 1e-5
n_iter = 30000
disp_step = 10
save_step = 100
is_render = args.render
env_id = args.env
save_dir = "./save_" + env_id

#Create multiple environments
#----------------------------
env = MultiEnv(
    [make_env(i, env_id=env_id, unwrap=args.unwrap) for i in range(n_env)])
img_height, img_width, c_dim = env.ob_space.shape
a_dim = env.ac_space.n
runner = MultiEnvRunner(env, img_height, img_width, c_dim, n_step, n_stack,
                        gamma, lamb)

#Create the model
#----------------------------
config = tf.ConfigProto(allow_soft_placement=True,
                        intra_op_parallelism_threads=n_env,
                        inter_op_parallelism_threads=n_env)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
policy = PolicyModel(sess, img_height, img_width, c_dim * n_stack, a_dim,
                     "policy")

#Placeholders
#----------------------------
#action_ph:          (mb_size)
#old_neg_logprob_ph: (mb_size)
Exemplo n.º 4
0
lr = 7e-4
lr_decay = 0.99
eps = 1e-5
n_iter = 300000
disp_step = 100
save_step = 1000
is_render = args.render
env_id = args.env
save_dir = "./save_" + env_id

#Create multiple environments
#----------------------------
env = MultiEnv([make_env(i, env_id=env_id) for i in range(n_env)])
a_dim = env.ac_space.n
img_height, img_width, c_dim = env.ob_space.shape
runner = MultiEnvRunner(env, img_height, img_width, c_dim, n_step, n_stack,
                        gamma)

#Create the model
#----------------------------
config = tf.ConfigProto(intra_op_parallelism_threads=n_env,
                        inter_op_parallelism_threads=n_env)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
policy = PolicyModel(sess, img_height, img_width, c_dim * n_stack, a_dim)

#Placeholders
#----------------------------
action_ph = tf.placeholder(tf.int32, [None], name="action")
adv_ph = tf.placeholder(tf.float32, [None], name="advantage")
discount_return_ph = tf.placeholder(tf.float32, [None],
                                    name="discounted_return")
Exemplo n.º 5
0
eps = 1e-5
n_iter = 300000
disp_step = 100
save_step = 1000
is_render = args.render
env_id = args.env
save_dir = "./save_" + env_id

#Create multiple environments
#----------------------------
env = MultiEnv([make_env(i, env_id=env_id) for i in range(n_env)])
a_dim = env.ac_space.shape[0]
s_dim = env.ob_space.shape[0]
a_low = env.ac_space.low[0]
a_high = env.ac_space.high[0]
runner = MultiEnvRunner(env, s_dim, a_dim, n_step, gamma)

#Create the model
#----------------------------
config = tf.ConfigProto(intra_op_parallelism_threads=n_env,
                        inter_op_parallelism_threads=n_env)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
policy = PolicyModel(sess, s_dim, a_dim, a_low, a_high, name="policy")

#Placeholders
#----------------------------
#action_ph: (mb_size, a_dim)
#adv_ph:    (mb_size)
#reward_ph: (mb_size)
action_ph = tf.placeholder(tf.float32, [None, a_dim], name="action")