示例#1
0
    def __init__(self, env_name):
        self.device = torch.device('cuda')
        self.env_name = env_name
        self.env = wrappers.make_env(env_name)

        self.gamma = 0.99
        self.batch_size = 32
        self.replay_buffer_size = 10000
        self.replay_start_size = 10000
        self.learning_rate = 1e-4
        self.update_target_interval = 1000

        self.epsilon_start = 1.0
        self.epsilon_end = 0.02
        self.epsilon_period = 100000

        self.reward_bound = 19.5

        self.replay_buffer = replay_buffer.ReplayBuffer(
            self.replay_buffer_size)
        self.network = dqn_model.DQNModel(self.env.observation_space.shape,
                                          self.env.action_space.n).to(
                                              self.device)
        self.target_network = dqn_model.DQNModel(
            self.env.observation_space.shape,
            self.env.action_space.n).to(self.device)
        self.optimizer = optim.Adam(self.network.parameters(),
                                    lr=self.learning_rate)

        print(self.network)

        self.writer = SummaryWriter(comment='dqn' + self.env_name)

        self.total_rewards = []
        self.frame_index = 0
示例#2
0
    def __init__(self, env_name):
        self.env = wrappers.make_env(env_name)


        self.env_name = env_name
        self.device = torch.device('cuda')

        self.learning_rate = 2.5e-4
        self.stabilizer = 0.01
        self.gradient_momentum = 0.95
        self.gamma = 0.99
        self.batch_size = 32
        self.replay_start_size = 50000
        self.replay_buffer_size = 1000000
        self.update_target_interval = 10000
        self.training_frequency = 4

        self.epsilon_start = 1.0
        self.epsilon_end = 0.05
        self.epsilon_period = 1000000

        self.network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to(self.device)
        self.target_network = dqn_model.DQNModel(self.env.observation_space.shape, self.env.action_space.n).to(self.device)
        self.replay_buffer = replay_buffer.ReplayBuffer(self.replay_buffer_size)
        self.optimizer = optim.RMSprop(self.network.parameters(), lr = self.learning_rate, alpha = self.gradient_momentum, 
                    eps = self.stabilizer)
        self.loss_criterion = nn.SmoothL1Loss()

        print(self.network)

        self.writer = SummaryWriter(comment = 'dqn' + self.env_name)
        self.total_rewards = []
        self.best_mean_reward = None

        self.steps = 0
示例#3
0
    def __init__(self, env_name):
        self.env = wrappers.make_env(env_name)
        self.env_name = env_name
        self.device = torch.device('cuda')

        torch.manual_seed(2)
        np.random.seed(2)
        self.env.seed(2)
        self.env.action_space.seed(2)
        self.env.observation_space.seed(2)

        self.learning_rate = 1e-4
        self.gamma = 0.99
        self.batch_size = 32
        self.replay_start_size = 10000
        self.replay_buffer_size = 10000
        self.update_target_interval = 1000

        self.epsilon_start = 1.0
        self.epsilon_end = 0.02
        self.epsilon_period = 100000

        self.alpha = 0.6
        self.beta_start = 0.4
        self.beta_period = 100000
        self.beta = self.beta_start

        self.network = dqn_model.DQNModel(self.env.observation_space.shape,
                                          self.env.action_space.n).to(
                                              self.device)
        self.target_network = dqn_model.DQNModel(
            self.env.observation_space.shape,
            self.env.action_space.n).to(self.device)
        self.replay_buffer = replay_buffer.PriorityBuffer(
            self.replay_buffer_size, self.alpha)
        self.optimizer = optim.Adam(self.network.parameters(),
                                    lr=self.learning_rate)

        print(self.network)

        self.writer = SummaryWriter(comment='dqnpriority' + self.env_name)
        self.total_rewards = []
        self.best_mean_reward = None

        self.steps = 0