def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create multiple envs env_single = env env_gen = env_generator("LunarLanderContinuous-v2", args) env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"]) # create models hidden_sizes_actor = [256, 256] hidden_sizes_critic = [256, 256] actor = GaussianDist( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, hidden_activation=torch.tanh, ).to(device) critic = MLP( input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_critic, hidden_activation=torch.tanh, ).to(device) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic.parameters(), lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, critic) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env_single, env_multi, args, hyper_params, models, optims) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create models actor = GaussianDist(input_size=state_dim, output_size=action_dim, hidden_sizes=[256, 256]).to(device) critic = MLP(input_size=state_dim, output_size=1, hidden_sizes=[256, 256]).to(device) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic.parameters(), lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, critic) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env, args, hyper_params, models, optims) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [400, 300] hidden_sizes_critic = [400, 300] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic critic1 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target2 = FlattenMLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1.load_state_dict(critic1.state_dict()) critic_target2.load_state_dict(critic2.state_dict()) # concat critic parameters to use one optim critic_parameters = list(critic1.parameters()) + list(critic2.parameters()) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic_parameters, lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise instance to make randomness of action exploration_noise = GaussianNoise(action_dim, hyper_params["EXPLORATION_NOISE"], hyper_params["EXPLORATION_NOISE"]) target_policy_noise = GaussianNoise( action_dim, hyper_params["TARGET_POLICY_NOISE"], hyper_params["TARGET_POLICY_NOISE"], ) # make tuples to create an agent models = (actor, actor_target, critic1, critic2, critic_target1, critic_target2) optims = (actor_optim, critic_optim) # create an agent agent = TD3Agent(env, args, hyper_params, models, optims, exploration_noise, target_policy_noise) # run if args.test: agent.test() else: agent.train()
def get(env, args): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings """ state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] if hyper_params["USE_HER"]: state_dim *= 2 hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"] hidden_sizes_vf = hyper_params["NETWORK"]["VF_HIDDEN_SIZES"] hidden_sizes_qf = hyper_params["NETWORK"]["QF_HIDDEN_SIZES"] # target entropy target_entropy = -np.prod((action_dim, )).item() # heuristic # create actor actor = TanhGaussianDistParams(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor).to(device) # create v_critic vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target.load_state_dict(vf.state_dict()) # create q_critic qf_1 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) qf_2 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) vf_optim = optim.Adam( vf.parameters(), lr=hyper_params["LR_VF"], weight_decay=hyper_params["WEIGHT_DECAY"], ) qf_1_optim = optim.Adam( qf_1.parameters(), lr=hyper_params["LR_QF1"], weight_decay=hyper_params["WEIGHT_DECAY"], ) qf_2_optim = optim.Adam( qf_2.parameters(), lr=hyper_params["LR_QF2"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, vf, vf_target, qf_1, qf_2) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent return Agent(env, args, hyper_params, models, optims, target_entropy, her)
def get(env, args): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings """ state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"] hidden_sizes_critic = hyper_params["NETWORK"]["CRITIC_HIDDEN_SIZES"] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic1 critic1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic1_target = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic1_target.load_state_dict(critic1.state_dict()) # create critic2 critic2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2_target = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic2_target.load_state_dict(critic2.state_dict()) # concat critic parameters to use one optim critic_parameters = list(critic1.parameters()) + list(critic2.parameters()) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic_parameters, lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise exploration_noise = GaussianNoise( action_dim, min_sigma=hyper_params["EXPLORATION_NOISE"], max_sigma=hyper_params["EXPLORATION_NOISE"], ) target_policy_noise = GaussianNoise( action_dim, min_sigma=hyper_params["TARGET_POLICY_NOISE"], max_sigma=hyper_params["TARGET_POLICY_NOISE"], ) # make tuples to create an agent models = (actor, actor_target, critic1, critic1_target, critic2, critic2_target) optims = (actor_optim, critic_optim) noises = (exploration_noise, target_policy_noise) # create an agent return Agent(env, args, hyper_params, models, optims, noises)
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [256, 256] hidden_sizes_vf = [256, 256] hidden_sizes_qf = [256, 256] # target entropy target_entropy = -np.prod((action_dim, )).item() # heuristic # create actor actor = TanhGaussianDistParams(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor).to(device) # create v_critic vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(device) vf_target.load_state_dict(vf.state_dict()) # create q_critic qf_1 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) qf_2 = FlattenMLP(input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf).to(device) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["LAMBDA2"], ) vf_optim = optim.Adam(vf.parameters(), lr=hyper_params["LR_VF"], weight_decay=hyper_params["LAMBDA2"]) qf_1_optim = optim.Adam( qf_1.parameters(), lr=hyper_params["LR_QF1"], weight_decay=hyper_params["LAMBDA2"], ) qf_2_optim = optim.Adam( qf_2.parameters(), lr=hyper_params["LR_QF2"], weight_decay=hyper_params["LAMBDA2"], ) # make tuples to create an agent models = (actor, vf, vf_target, qf_1, qf_2) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent agent = SACfDAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [256, 256] hidden_sizes_critic = [256, 256] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic critic = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target.load_state_dict(critic.state_dict()) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic.parameters(), lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise noise = OUNoise( action_dim, theta=hyper_params["OU_NOISE_THETA"], sigma=hyper_params["OU_NOISE_SIGMA"], ) # make tuples to create an agent models = (actor, actor_target, critic, critic_target) optims = (actor_optim, critic_optim) # HER her = ReacherHER() if hyper_params["USE_HER"] else None # create an agent agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ hidden_sizes_actor = [400, 300] hidden_sizes_critic = [400, 300] # create actor actor = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target = MLP( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, output_activation=torch.tanh, ).to(device) actor_target.load_state_dict(actor.state_dict()) # create critic critic_1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target2 = MLP( input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_critic, ).to(device) critic_target1.load_state_dict(critic_1.state_dict()) critic_target2.load_state_dict(critic_2.state_dict()) # create optimizers actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_parameter = list(critic_1.parameters()) + list(critic_2.parameters()) critic_optim = optim.Adam( critic_parameter, lr=hyper_params["LR_CRITIC_1"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # noise instance to make randomness of action noise = GaussianNoise( hyper_params["GAUSSIAN_NOISE_MIN_SIGMA"], hyper_params["GAUSSIAN_NOISE_MAX_SIGMA"], hyper_params["GAUSSIAN_NOISE_DECAY_PERIOD"], ) # make tuples to create an agent models = (actor, actor_target, critic_1, critic_2, critic_target1, critic_target2) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env, args, hyper_params, models, optims, noise) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, env_name: str, args: argparse.Namespace, state_dim=75, action_dim=17): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create models actor = CategoricalDist(input_size=state_dim, output_size=action_dim, hidden_sizes=[256, 256, 128]).to(device) critic = MLP(input_size=state_dim, output_size=1, hidden_sizes=[256, 256, 128]).to(device) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic.parameters(), lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, critic) optims = (actor_optim, critic_optim) # create an agent agent = A2CAgent(env, args, hyper_params, models, optims) # run if args.test: all_names = [ 'N3000', 'N3001', 'N3002', 'N3003', 'N3005', 'N3006', 'N3007', 'N3008', 'N300A', 'N300B', 'N300C', 'N300D', 'N300E', 'N3100', 'N3101', 'N3102', 'N3103', 'N3105', 'N3106', 'N3107', 'N3108', 'N310A', 'N310B', 'N310C', 'N310D', 'N310E', 'N3200', 'N3201', 'N3202', 'N3203', 'N3204', 'N3205', 'N3206', 'N3207', 'N3208', 'N3209', 'N320A', 'N320B', 'N320C', 'N320D', 'N320E', 'N3300', 'N3301', 'N3302', 'N3303', 'N3304', 'N3305', 'N3306', 'N3307', 'N3308', 'N3309', 'N330A', 'N330B', 'N330C', 'N330D', 'N330E', 'N3400', 'N3401', 'N3402', 'N3403', 'N3405', 'N3406', 'N3407', 'N3408', 'N340A', 'N340B', 'N340C', 'N340D', 'N340E', 'N3500', 'N3501', 'N3502', 'N3503', 'N3505', 'N3506', 'N3507', 'N3508', 'N350A', 'N350B', 'N350C', 'N350D', 'N350E', 'N3600', 'N3601', 'N3602', 'N3603', 'N3604', 'N3605', 'N3607', 'N3608', 'N360A', 'N360B', 'N360C', 'N360D', 'N360E', 'N3700', 'N3701', 'N3702', 'N3703', 'N3705', 'N3706', 'N3707', 'N3708', 'N370A', 'N370B', 'N370C', 'N370D', 'N370E' ] for name in names: agent.test(name='Glover/' + name + '.FCTP') else: agent.train()