break env_t.close() env_name = 'CartPole-v1' gamma = 0.99 num_envs = 8 PENALTY = -1.0 n_step = 4 max_frame = 50000 lr = 0.001 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if __name__ == '__main__': envs = SubprocVecEnv([make_env(env_name) for i in range(num_envs)]) net = nn.Sequential(nn.Linear(4, 128), nn.ReLU(), nn.Linear(128, 2)) actor = Actor(4, 128, 2).to(device) critic = Critic(4, 128).to(device) solver = optim.Adam( list(actor.parameters()) + list(critic.parameters()), lr) duration = [] frame_count = 0 lifespan = [[0] for _ in range(num_envs)] s_gotten = None while frame_count * n_step < max_frame: obs_l, acts_l, rews_l, dones_l, probs_l = [], [], [], [], [] accept_sample = [True for _ in range(num_envs)] for _ in range(n_step):
from common.multiprocessing_env import SubprocVecEnv, make_env from ddpg import * from utils import * from memory import Memory from ddpg_pendulum_net import Actor, Critic import warnings warnings.filterwarnings("ignore", category=UserWarning) if __name__ == "__main__": env_name = 'Pendulum-v0' num_envs = 8 envs = [make_env(env_name) for i in range(num_envs)] envs = SubprocVecEnv(envs) actor_lr = 2e-4 critic_lr = 3e-4 gamma = 0.99 soft_tau = 0.001 model = DDPG(Actor(), Critic(), gamma, soft_tau, actor_lr, critic_lr) memory_size = 500000 memory = Memory(memory_size) eval_freq = 1000 batch_size = 256 n_steps = 1 num_ddpg_iter = 5 max_iteration = 50000 frame_count = 0