def launch(args): # Experiment parameters exp_kwargs = { 'goal_tolerance_parameters': { 'inc_tol_obs': True, 'initial_tol': 0.020, 'final_tol': 0.001, 'N_ts': 200000, 'function': 'decay', 'set_tol': 0 }, 'relative_q': True, 'joint_representation': 'trig', 'resample_joints': False, 'normalize_obs': False } # create the ddpg_agent, test relative decay curriculum env = gym.make(args.env_name, **exp_kwargs) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
def launch(args): env = gym.make(args.env_name) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg_agent if args.alg == 'gac': # create the ddpg agent to interact with the environment print("Start GAC...") gac_trainer = gac_agent(args, env, env_params) gac_trainer.learn() elif args.alg == 'sac': # create the ddpg agent to interact with the environment print("Start SAC...") sac_trainer = sac_agent(args, env, env_params) sac_trainer.learn() elif args.alg == 'td3': print('Start TD3...') td3_trainer = td3_agent(args, env, env_params) td3_trainer.learn() else: # create the ddpg agent to interact with the environment print("Start DDPG...") ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
def launch(args): # create the ddpg_agent env = gym.make(args.env_name) # set random seeds for reproduce env.seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # get the environment parameters env_params = get_env_params(env) if args.algo == 'ddpg': eval = ddpg_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'sac': eval = sac_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'forward': eval = dynamics_learner(args, env, env_params) eval.eval_dynamics(PATH) elif args.algo == 'inverse': eval = inverse_dynamics_learner(args, env, env_params) eval.visualize(PATH) elif args.algo == 'double': eval = double_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'share': eval = double_agent_share(args, env, env_params) eval.visualize(PATH) elif args.algo == 'shaping_share': eval = double_agent_shaping_share(args, env, env_params) eval.visualize(PATH) elif args.algo == 'attention': eval = double_agent_attention(args, env, env_params) eval.visualize(PATH) elif args.algo == 'sgg': eval = sgg_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'alt': eval = alternate_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'br': eval = sac_br_agent(args, env, env_params) eval.visualize(PATH) elif args.algo == 'br_ppo': eval = sac_br_agent_ppo(args, env, env_params) eval.visualize(PATH) elif args.algo == 'skill': eval = sac_skill_agent(args, env, env_params) eval.visualize(PATH) else: raise NotImplementedError
def launch(args): # create the ddpg_agent env = gym.make(args.env_name) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
def launch(args): # create the ddpg_agent env = SharedBlockTouchSensorsEnvSparse() env = TimeLimit(env, 200) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
def launch(args): # create the ddpg_agent env = gym.make('gym_multiRL:MultiRL{}'.format(args.env_name)) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env, args) # create the ddpg agent to interact with the environment # if args.actor_loss_type=='mgda': # ddpg_trainer = ddpg_agent_mgda(args, env, env_params) # else: ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
def launch(args): # create the ddpg_agent env = gym.make(args.env_name) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment reset_dir('./experiments/log') tb_wrt = SummaryWriter('./experiments/log') ddpg_trainer = ddpg_agent(args, env, env_params, tb_wrt, is_her=args.is_her) ddpg_trainer.learn()