def main(robot='doggo', task='goal1', algo='hrl', seed=1, exp_name="test", cpu=1): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = [ 'ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo', 'hrl' ] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task + exp_name if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: # num_steps = 1e7 # steps_per_epoch = 30000 num_steps = 1e5 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) print("algo", algo) env_name = 'Safexp-' + robot + task + '-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 6000000 #10000000#5000000 #1e6 #1e7 10 000 000 vs 1 500 000 => 50epochs; 3 000 000 => 100 epochs; 6 000 000 => 200 epochs steps_per_epoch = 100000 #100000#30000 epochs = int(num_steps / steps_per_epoch) # original #num_steps 1e7 = 10 000 000 steps_per_epoch=30 000 print('\n\nNum steps', num_steps, ', epochs', epochs, ', steps per epoch', steps_per_epoch, '\n') save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' algo( env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs, #penalty_lr=100000 )
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = [ 'goal1', 'goal2', 'button1', 'button2', 'push1', 'push2', 'safety' ] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 3e6 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval(algo) if task == 'Safety': env_config = safety_point_goal_1.SafetyPointGoal1ConfigModule() getter_fn = lambda: env_config.get_env() else: env_name = 'Safexp-' + robot + task + '-v0' getter_fn = lambda: gym.make(env_name) algo(env_fn=getter_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(env, alg, seed, exp_name, cpu): # Verify experiment # robot_list = ['point', 'car', 'doggo'] # task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = alg.lower() # task = task.capitalize() # robot = robot.capitalize() assert algo in algo_list, "Invalid algo" # assert task.lower() in task_list, "Invalid task" # assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters # exp_name = algo + '_' + robot + task # if robot=='Doggo': # num_steps = 1e8 # steps_per_epoch = 60000 # else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 50 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger env_name = env exp_name = exp_name or env_name logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir='/var/tmp/') # Algo and Env algo = eval('safe_rl.' + algo) # env_name = 'Safexp-'+robot+task+'-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(64, 64), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] task = task.capitalize() robot = robot.capitalize() assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" algo = 'sac' # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 entropy_constraint = -1. cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, entropy_constraint=entropy_constraint, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
parser.add_argument('--exp_name', type=str, default='runagent') parser.add_argument('--kl', type=float, default=0.01) parser.add_argument('--render', action='store_true') parser.add_argument('--reward_penalized', action='store_true') parser.add_argument('--objective_penalized', action='store_true') parser.add_argument('--learn_penalty', action='store_true') parser.add_argument('--penalty_param_loss', action='store_true') parser.add_argument('--entreg', type=float, default=0.) args = parser.parse_args() try: import safety_gym except: print('Make sure to install Safety Gym to use constrained RL environments.') mpi_fork(args.cpu) # run parallel code with mpi # Prepare logger from safe_rl.utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) # Prepare agent agent_kwargs = dict(reward_penalized=args.reward_penalized, objective_penalized=args.objective_penalized, learn_penalty=args.learn_penalty, penalty_param_loss=args.penalty_param_loss) if args.agent=='ppo': agent = PPOAgent(**agent_kwargs) elif args.agent=='trpo': agent = TRPOAgent(**agent_kwargs) elif args.agent=='cpo':
parser.add_argument('--cost_constraint', type=float, default=None) parser.add_argument('--cost_lim', type=float, default=None) parser.add_argument('--penalty_lr', type=float, default=5e-2) parser.add_argument('--use_discor', default=False, action='store_true') parser.add_argument('--cost_maxq', default=False, action='store_true') parser.add_argument('--use_act_trans', default=False, action='store_true') args = parser.parse_args() try: import safety_gym except: print( 'Make sure to install Safety Gym to use constrained RL environments.' ) mpi_fork(args.cpu) from safe_rl.utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) if not args.use_act_trans: critic = mlp_critic else: critic = mlp_critic_act_transform sac(lambda: gym.make(args.env), actor_fn=mlp_actor, critic_fn=critic, ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed,
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e10 steps_per_epoch = 60000 else: num_steps = 1e10 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = algo logger_kwargs = setup_logger_kwargs(exp_name, seed) # if not os.path.exists("./log"): # os.makedirs("./log") args.log_name = \ "seed::" + str(args.seed) + "_algo::" + args.algo + "_task::" + str(args.obstacle_type) + \ "_cost_lim::" + str(args.cost_lim) # custom_log = set_log(args) # Algo and Env algo = eval('safe_rl.' + algo) # env = gym.make("Pendulum-v0") # env._max_episode_steps = 64 # env = PendulumCostWrapper(env) import gym_env # Setup pointmass env = gym.make("pointmass-v0", args=args) lam = 0.95 cost_lam = 0.95 pi_lr = 0.001 algo(env_fn=lambda: env, ac_kwargs=dict(hidden_sizes=(16, 16), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=args.cost_lim, seed=seed, logger_kwargs=logger_kwargs, prefix=algo, lam=lam, cost_lam=cost_lam, max_ep_len=1000, pi_lr=pi_lr, args=args)
def main(robot, task, algo, seed, exp_name, cpu, wrapper): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Car': num_steps = 1e7 steps_per_epoch = 30000 max_ep_len = 150 env_config = DEFAULT_ENV_CONFIG_C else: #Point num_steps = 1e7 steps_per_epoch = 30000 max_ep_len = 300 env_config = DEFAULT_ENV_CONFIG_P epochs = int(num_steps / steps_per_epoch) save_freq = 10 target_kl = 0.01 cost_lim = 5 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' if not wrapper: env_fn = lambda: gym.make(env_name) else: env_fn = lambda: SafetyGymEnv(robot=robot, task=task[:-1], level=int(task[-1]), seed=seed, config=env_config) algo(env_fn=env_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, max_ep_len=max_ep_len, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, algo, seed, exp_name, cpu, constraint, use_aug, dense_coeff): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters #exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir=str( pathlib.Path('../tests', exp_name)), datestamp=False) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' def env_fn(): env = gym.make(env_name) if constraint != None: if use_aug: augmentation_type = 'constraint_state_concat' else: augmentation_type = 'None' use_dense = dense_coeff > 0. env = ConstraintEnv( env, [get_constraint(constraint)(False, use_dense, dense_coeff)], augmentation_type=augmentation_type, log_dir='../tests/' + exp_name) fcenv = FlattenObservation(env) return fcenv algo(env_fn=env_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs) (pathlib.Path('../tests') / exp_name / 'final.txt').touch()