def __init__(self, env_name): self.device = torch.device('cuda') self.env_name = env_name self.env = wrappers.make_env(env_name) self.gamma = 0.99 self.batch_size = 32 self.replay_buffer_size = 10000 self.replay_start_size = 10000 self.learning_rate = 1e-4 self.update_target_interval = 1000 self.epsilon_start = 1.0 self.epsilon_end = 0.02 self.epsilon_period = 100000 self.reward_bound = 19.5 self.replay_buffer = replay_buffer.ReplayBuffer( self.replay_buffer_size) self.network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to( self.device) self.target_network = dqn_model.DQNModel( self.env.observation_space.shape, self.env.action_space.n).to(self.device) self.optimizer = optim.Adam(self.network.parameters(), lr=self.learning_rate) print(self.network) self.writer = SummaryWriter(comment='dqn' + self.env_name) self.total_rewards = [] self.frame_index = 0
def __init__(self, env_name): self.env = wrappers.make_env(env_name) self.env_name = env_name self.device = torch.device('cuda') self.learning_rate = 2.5e-4 self.stabilizer = 0.01 self.gradient_momentum = 0.95 self.gamma = 0.99 self.batch_size = 32 self.replay_start_size = 50000 self.replay_buffer_size = 1000000 self.update_target_interval = 10000 self.training_frequency = 4 self.epsilon_start = 1.0 self.epsilon_end = 0.05 self.epsilon_period = 1000000 self.network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to(self.device) self.target_network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to(self.device) self.replay_buffer = replay_buffer.ReplayBuffer(self.replay_buffer_size) self.optimizer = optim.RMSprop(self.network.parameters(), lr = self.learning_rate, alpha = self.gradient_momentum, eps = self.stabilizer) self.loss_criterion = nn.SmoothL1Loss() print(self.network) self.writer = SummaryWriter(comment = 'dqn' + self.env_name) self.total_rewards = [] self.best_mean_reward = None self.steps = 0
def __init__(self, env_name): self.env = wrappers.make_env(env_name) self.env_name = env_name self.device = torch.device('cuda') torch.manual_seed(2) np.random.seed(2) self.env.seed(2) self.env.action_space.seed(2) self.env.observation_space.seed(2) self.learning_rate = 1e-4 self.gamma = 0.99 self.batch_size = 32 self.replay_start_size = 10000 self.replay_buffer_size = 10000 self.update_target_interval = 1000 self.epsilon_start = 1.0 self.epsilon_end = 0.02 self.epsilon_period = 100000 self.alpha = 0.6 self.beta_start = 0.4 self.beta_period = 100000 self.beta = self.beta_start self.network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to( self.device) self.target_network = dqn_model.DQNModel( self.env.observation_space.shape, self.env.action_space.n).to(self.device) self.replay_buffer = replay_buffer.PriorityBuffer( self.replay_buffer_size, self.alpha) self.optimizer = optim.Adam(self.network.parameters(), lr=self.learning_rate) print(self.network) self.writer = SummaryWriter(comment='dqnpriority' + self.env_name) self.total_rewards = [] self.best_mean_reward = None self.steps = 0