def train(env_id, num_timesteps, seed): env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir()) set_global_seeds(seed) env.seed(seed) gym.logger.setLevel(logging.WARN) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed): env = Lynx() #env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(1))) set_global_seeds(seed) #env.seed(seed) gym.logger.setLevel(logging.WARN) with tf.Session(config=tf.ConfigProto()) as sess: ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): if MLP: policy = MlpPolicy(sess, ob_space=env.observation_space, ac_space=env.action_space) else: policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=50, desired_kl=0.002, num_timesteps=num_timesteps, animate=False)
def train(env_id, num_timesteps, seed, alg, lr, momentum): env = make_mujoco_env(env_id, seed) if alg == 'sgd': from baselines.acktr.acktr_cont import learn elif alg == 'mid': from baselines.acktr.acktr_cont_midpoint import learn elif alg == 'geo': from baselines.acktr.acktr_cont_geo import learn else: raise ValueError nprocs = 4 with tf.Session( config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=nprocs, inter_op_parallelism_threads=nprocs)): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) policy = GaussianMlpPolicy(ob_dim, ac_dim, 'pi') learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False, lr=lr, momentum=momentum) env.close()
def train(args, num_timesteps, seed): import tensorflow as tf from baselines.common.cmd_util import make_mujoco_env, mujoco_arg_parser from baselines.acktr.acktr_cont import learn from baselines.acktr.policies import GaussianMlpPolicy from baselines.acktr.value_functions import NeuralNetValueFunction env = common.make_env(args) env.reward_scale = 0.01 with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def run_train_task(vv): # Create envs. env = vv['env'](log_scale_limit=0.0, max_path_length=vv['path_length']) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=vv['discount'], lam=0.97, timesteps_per_batch=vv['batch_size'], desired_kl=0.002, num_timesteps=vv['num_timesteps'], max_path_length=vv['path_length'], animate=False) env.close()
def train(env_id, num_timesteps, seed): env = make_gym_control_env(env_id, seed) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed): env = make_mujoco_env(env_id, seed) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed, render): env = LearningEnvironment(num_particles=PARTICLES, disable_render=not render) env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json")) set_global_seeds(seed) gym.logger.setLevel(logging.WARN) with tf.Session(config=tf.ConfigProto()) as session: ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=8000, desired_kl=0.0002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed): """ train an ACKTR model on atari :param env_id: (str) Environment ID :param num_timesteps: (int) The total number of samples :param seed: (int) The initial seed for training """ env = make_mujoco_env(env_id, seed) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): value_fn = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, value_fn=value_fn, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed): env=gym.make(env_id) if logger.get_dir(): env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json")) set_global_seeds(seed) env.seed(seed) gym.logger.setLevel(logging.WARN) with tf.Session(config=tf.ConfigProto()) as session: ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed): env=gym.make(env_id) rank = MPI.COMM_WORLD.Get_rank() env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) set_global_seeds(seed) env.seed(seed) gym.logger.setLevel(logging.WARN) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002, num_timesteps=num_timesteps, animate=False) env.close()
def train(env_id, num_timesteps, seed, save, gamma, lam, desired_kl): env = make_mujoco_env(env_id, seed) with tf.Session(config=tf.ConfigProto()): ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) ret = learn(env, policy=policy, vf=vf, gamma=gamma, lam=lam, desired_kl=desired_kl, timesteps_per_batch=2500, num_timesteps=num_timesteps, animate=False) env.close() np.savetxt(save, np.array([ret]))
from baselines.acktr.value_functions import NeuralNetValueFunction from baselines.common import set_global_seeds env = gym.make('GazeboModularScara3DOF-v0') initial_observation = env.reset() print("Initial observation: ", initial_observation) env.render() seed=0 set_global_seeds(seed) env.seed(seed) with tf.Session(config=tf.ConfigProto()) as session: ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] with tf.variable_scope("vf"): vf = NeuralNetValueFunction(ob_dim, ac_dim) with tf.variable_scope("pi"): policy = GaussianMlpPolicy(ob_dim, ac_dim) learn(env, policy=policy, vf=vf, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.02, num_timesteps=1e6, animate=False, save_model_with_prefix='', restore_model_from_file='')