Exemplo n.º 1
0
def train():
    env = gym.make("CartPole-v1")
    agent = CartPoleDRQNAgent(DRQN, model=DRQN(), env=env,
                              exploration=LinearSchedule(10000, initial_p=1.0, final_p=0.02),
                              batch_size=4, memory_size=100000, min_mem=100)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/cartpole/data/drqn'
    agent.train(10000, 10000, 100, False)
Exemplo n.º 2
0
def train():
    env = gym.make("CartPole-v1")
    agent = CartPoleDRQNAgent(DRQN, model=DRQN(), env=env,
                          exploration=LinearSchedule(100000, initial_p=1.0, final_p=0.1),
                          batch_size=32, target_update_frequency=20, memory_size=1000)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/data/drqn_cartpole_partial'
    agent.train(100000, 200, 100, False)
Exemplo n.º 3
0
def plot(checkpoint):
    env = None
    agent = CartPoleDRQNAgent(DRQN, model=DRQN(), env=env,
                              exploration=LinearSchedule(100000, initial_p=1.0, final_p=0.1),
                              batch_size=32, target_update_frequency=20)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/data/drqn_cartpole_partial'
    agent.loadCheckpoint(checkpoint)
    plotLearningCurve(agent.episode_rewards, window=20)
    plt.show()
Exemplo n.º 4
0
def train():
    env = gym.make("CartPole-v1")
    agent = CartPoleDRQNAgent(DQN,
                              model=DQN(),
                              env=env,
                              exploration=LinearSchedule(10000,
                                                         initial_p=1.0,
                                                         final_p=0.02),
                              batch_size=32)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/data/dqn_cartpole'
    agent.train(10000, 500, 10000, False)
Exemplo n.º 5
0
def train():
    env = gym.make("CartPole-v1")
    agent = CartPoleDRQNSliceAgent(DRQN,
                                   model=DRQN(),
                                   env=env,
                                   exploration=LinearSchedule(100000,
                                                              initial_p=1.0,
                                                              final_p=0.02),
                                   batch_size=32,
                                   memory_size=100000,
                                   min_mem=10000,
                                   sequence_len=32)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/cartpole/data/partial_drqn_slice'
    agent.loadCheckpoint('20190212204507')
    agent.train(100000, 500, 200, False)
Exemplo n.º 6
0
            final_mask_batch.append(
                torch.tensor(list(episode_transition.final_mask),
                             dtype=torch.uint8))
            pad_mask_batch.append(
                torch.tensor(list(episode_transition.pad_mask),
                             dtype=torch.uint8))

        state = (torch.stack(state_1_batch).to(self.device),
                 torch.stack(state_2_batch).to(self.device))
        action = torch.stack(action_batch).to(self.device)
        next_state = (torch.stack(next_state_1_batch).to(self.device),
                      torch.stack(next_state_2_batch).to(self.device))
        reward = torch.stack(reward_batch).to(self.device)
        final_mask = torch.stack(final_mask_batch).to(self.device)
        pad_mask = torch.stack(pad_mask_batch)
        non_pad_mask = 1 - pad_mask

        return state, action, next_state, reward, final_mask, non_pad_mask


if __name__ == '__main__':
    agent = ConvDRQNAgent(DRQN,
                          model=DRQN(),
                          env=ScoopEnv(),
                          exploration=LinearSchedule(10000,
                                                     initial_p=1.0,
                                                     final_p=0.1),
                          min_mem=1000)
    agent.loadCheckpoint('20190212192630')
    agent.train(100000, max_episode_steps=200)
Exemplo n.º 7
0
    def forward(self, x, hidden=None):
        x = x.float() / 256
        shape = x.shape
        x = x.view(shape[0] * shape[1], shape[2], shape[3], shape[4])
        conv_out = self.conv(x)
        x = conv_out.view(shape[0], shape[1], -1)
        if hidden is None:
            x, hidden = self.lstm(x)
        else:
            x, hidden = self.lstm(x, hidden)
        x = self.fc(x)
        return x, hidden


if __name__ == '__main__':
    env = gym.make('PongNoFrameskip-v4')
    env = wrap_drqn(env)

    agent = DRQNAgent(DRQN,
                      model=DRQN(env.observation_space.shape,
                                 env.action_space.n),
                      env=env,
                      exploration=LinearSchedule(100000, 0.02),
                      batch_size=1,
                      target_update_frequency=1000,
                      memory_size=100000,
                      min_mem=10000)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/gym_test/pong/data/drqn'
    agent.train(10000, 10000, save_freq=50)
Exemplo n.º 8
0
        self.episode_rewards = checkpoint['episode_rewards']
        self.episode_lengths = checkpoint['episode_lengths']

        self.policy_net = HistoryDQN()
        self.policy_net.load_state_dict(checkpoint['policy_state_dict'])
        self.policy_net = self.policy_net.to(DEVICE)
        self.policy_net.train()

        self.target_net = HistoryDQN()
        self.target_net.load_state_dict(checkpoint['policy_state_dict'])
        self.target_net = self.target_net.to(DEVICE)
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters())
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


if __name__ == '__main__':
    agent = SynDQNAgent(
        20005,
        4,
        LinearSchedule(10000, 0.1),
        batch_size=256,
        saving_dir='/home/ur5/thesis/simple_task/scoop_grasp_2d/data/sync_dqn')
    agent.load_checkpoint('20181205143445')
    # agent.train(10000)
    plotLearningCurve(agent.episode_rewards)
    plt.show()
    plotLearningCurve(agent.episode_lengths, label='length', color='r')
    plt.show()
Exemplo n.º 9
0
            action_batch.append(torch.cat(episode_transition.action))
            reward_batch.append(torch.cat(episode_transition.reward))
            final_mask_batch.append(torch.tensor(list(episode_transition.final_mask), dtype=torch.uint8))
            pad_mask_batch.append(torch.tensor(list(episode_transition.pad_mask), dtype=torch.uint8))

        state = (torch.stack(state_0_batch).to(self.device),
                 torch.stack(state_1_batch).to(self.device))
        action = torch.stack(action_batch).to(self.device)
        next_state = (torch.stack(next_state_0_batch).to(self.device),
                      torch.stack(next_state_1_batch).to(self.device))
        reward = torch.stack(reward_batch).to(self.device)
        final_mask = torch.stack(final_mask_batch).to(self.device)
        pad_mask = torch.stack(pad_mask_batch)
        non_pad_mask = 1 - pad_mask

        return state, action, next_state, reward, final_mask, non_pad_mask


if __name__ == '__main__':
    envs = []
    for i in range(1):
        env = ScoopEnv(19997 + i)
        envs.append(env)

    agent = Agent(DRQN(envs[0].observation_space[0].shape, envs[0].observation_space[1].shape, 4),
                  envs, LinearSchedule(10000, 0.1), batch_size=128, min_mem=1000)
    agent.saving_dir = '/home/ur5/thesis/rdd_rl/scoop_vision/data/syn_drqn_dense'
    agent.loadCheckpoint('20190220221354')
    agent.train(100000, 200, save_freq=500)