Пример #1
0
class DQNLearner(learner):
    def __init__(self, skip_frame, num_actions, load=None, random_state=np.random.RandomState()):
        super().__init__()

        rand_vals = (1, 0.1, 1000000)  # starting at 1 anneal eGreedy policy to 0.1 over 1,000,000 actions
        self.action_handler = ActionHandler(ActionPolicy.eGreedy, rand_vals)

        self.minimum_replay_size = 100
        self.exp_handler = DataSet(84, 84, random_state, max_steps=1000000, phi_length=skip_frame)
        self.cnn = CNN((None, skip_frame, 84, 84), num_actions)

        self.skip_frame = skip_frame
        self.discount = .95
        self.costList = list()
        self.state_tm1 = None

        if load is not None:
            self.cnn.load(load)

    def frames_processed(self, frames, action_performed, reward):
        game_action = self.action_handler.game_action_to_action_ind(action_performed)
        if self.state_tm1 is not None:
            self.exp_handler.add_sample(self.state_tm1, game_action, reward, False)

        # generate minibatch data
        if self.exp_handler.size > self.minimum_replay_size:
            states, actions, rewards, state_tp1s, terminal = self.exp_handler.random_batch(32)
            cost = self.cnn.train(states, actions, rewards, state_tp1s, terminal)
            self.costList.append(cost)
            self.action_handler.anneal()

        self.state_tm1 = frames[-1]

    def get_action(self, processed_screens):
        return self.cnn.get_output(processed_screens)[0]

    def game_over(self):
        self.exp_handler.add_terminal()  # adds a terminal

    def get_game_action(self):
        return self.action_handler.action_vect_to_game_action(
            self.get_action(self.exp_handler.phi(self.state_tm1).reshape(1, self.skip_frame, 84, 84)))

    def set_legal_actions(self, legal_actions):
        self.action_handler.set_legal_actions(legal_actions)

    def save(self, file):
        self.cnn.save(file)

    def get_cost_list(self):
        return self.costList
class PrioritizedExperienceLearner(learner):
    def __init__(self, skip_frame, num_actions, load=None):
        super().__init__()

        rand_vals = (1, 0.1, 10000 / skip_frame)  # starting at 1 anneal eGreedy policy to 0.1 over 1,000,000/skip_frame
        self.action_handler = ActionHandler(ActionPolicy.eGreedy, rand_vals)

        self.exp_handler = PrioritizedExperienceHandler(1000000 / skip_frame)
        self.train_handler = TrainHandler(32, num_actions)
        self.cnn = CNN((None, skip_frame, 86, 80), num_actions, 0.1)

        self.discount = 0.99

        if load is not None:
            self.cnn.load(load)

    def frames_processed(self, frames, action_performed, reward):
        self.exp_handler.add_experience(frames, self.action_handler.game_action_to_action_ind(action_performed), reward)
        self.train_handler.train_prioritized(self.exp_handler, 0.99, self.cnn)
        self.action_handler.anneal()

    def plot_tree(self):
        self.exp_handler.tree.plot()

    def get_action(self, game_input):
        return self.cnn.get_output(game_input)[0]

    def game_over(self):
        self.exp_handler.trim()  # trim experience replay of learner
        self.exp_handler.add_terminal()  # adds a terminal

    def get_game_action(self, game_input):
        return self.action_handler.action_vect_to_game_action(self.get_action(game_input))

    def set_legal_actions(self, legal_actions):
        self.action_handler.set_legal_actions(legal_actions)

    def save(self, file):
        self.cnn.save(file)

    def get_cost_list(self):
        return self.train_handler.costList
def test_anneal(action_handler: ActionHandler):
    action_handler.anneal()
    action_handler.anneal()
    assert action_handler.randVal == 0.1
    assert action_handler.randVal == action_handler.lowest_rand_val
Пример #4
0
class NoveltyLearner():
    def __init__(self, skip_frame, num_actions):
        rand_vals = (
            1, 0.1, 1000000
        )  # starting at 1 anneal eGreedy policy to 0.1 over 1,000,000 actions
        self.action_handler = ActionHandler(ActionPolicy.eGreedy, rand_vals)

        self.minimum_replay_size = 100
        self.exp_handler = DataSet(84,
                                   84,
                                   np.random.RandomState(),
                                   max_steps=1000000,
                                   phi_length=skip_frame)
        self.cnn = CNN((None, skip_frame, 84, 84), num_actions)

        self.skip_frame = skip_frame
        self.costList = list()
        self.state_tm1 = None

        # novelty setup
        self.frame_table = dict()
        self.new_novel_states = 0

    def frames_processed(self, frames, action_performed, reward):
        # novelty reward
        for frame in frames:
            frame[frame > 0] = 1
            frame_hash = hash(frame.data.tobytes())

            # if already in table
            if frame_hash in self.frame_table:
                novelty_reward = 0
                self.frame_table[frame_hash] += 1
            # new state
            else:
                novelty_reward = 1
                self.frame_table[frame_hash] = 1
                self.new_novel_states += 1

        # if no reward from the game reward from novelty
        if reward == 0:
            reward = novelty_reward

        game_action = self.action_handler.game_action_to_action_ind(
            action_performed)
        if self.state_tm1 is not None:
            self.exp_handler.add_sample(self.state_tm1, game_action, reward,
                                        False)

        # generate minibatch data
        if self.exp_handler.size > self.minimum_replay_size:
            states, actions, rewards, state_tp1s, terminal = self.exp_handler.random_batch(
                32)
            cost = self.cnn.train(states, actions, rewards, state_tp1s,
                                  terminal)
            self.costList.append(cost)
            self.action_handler.anneal()

        self.state_tm1 = frames[-1]

    def set_legal_actions(self, legal_actions):
        self.num_actions = len(legal_actions)
        self.action_handler.set_legal_actions(legal_actions)

    def get_action(self, processed_screens):
        return self.cnn.get_output(processed_screens)[0]

    def get_game_action(self):
        return self.action_handler.action_vect_to_game_action(
            self.get_action(
                self.exp_handler.phi(self.state_tm1).reshape(
                    1, self.skip_frame, 84, 84)))

    def game_over(self):
        self.exp_handler.add_terminal()  # adds a terminal
        # print('novel states', self.new_novel_states, 'total states', len(self.frame_table))
        self.new_novel_states = 0

    def get_cost_list(self):
        return self.costList

    def save(self, file):
        self.cnn.save(file)
def test_anneal(action_handler: ActionHandler):
    action_handler.anneal()
    action_handler.anneal()
    assert action_handler.randVal == 0.1
    assert action_handler.randVal == action_handler.lowest_rand_val
Пример #6
0
class DQNLearner(learner):
    def __init__(self,
                 skip_frame,
                 num_actions,
                 load=None,
                 random_state=np.random.RandomState()):
        super().__init__()

        rand_vals = (
            1, 0.1, 1000000
        )  # starting at 1 anneal eGreedy policy to 0.1 over 1,000,000 actions
        self.action_handler = ActionHandler(rand_vals)

        self.minimum_replay_size = 100
        self.exp_handler = DataSet(84,
                                   84,
                                   random_state,
                                   max_steps=1000000,
                                   phi_length=skip_frame)
        self.cnn = CNN((None, skip_frame, 84, 84), num_actions)

        self.skip_frame = skip_frame
        self.discount = .95
        self.costList = list()
        self.state_tm1 = None

        if load is not None:
            self.cnn.load(load)

    def frames_processed(self, frames, action_performed, reward):
        game_action = self.action_handler.game_action_to_action_ind(
            action_performed)
        if self.state_tm1 is not None:
            self.exp_handler.add_sample(self.state_tm1, game_action, reward,
                                        False)

        # generate minibatch data
        if self.exp_handler.size > self.minimum_replay_size:
            states, actions, rewards, state_tp1s, terminal = self.exp_handler.random_batch(
                32)
            cost = self.cnn.train(states, actions, rewards, state_tp1s,
                                  terminal)
            self.costList.append(cost)
            self.action_handler.anneal()

        self.state_tm1 = frames[-1]

    def get_action(self, processed_screens):
        return self.cnn.get_output(processed_screens)[0]

    def game_over(self):
        self.exp_handler.add_terminal()  # adds a terminal

    def get_game_action(self):
        return self.action_handler.action_vect_to_game_action(
            self.get_action(
                self.exp_handler.phi(self.state_tm1).reshape(
                    1, self.skip_frame, 84, 84)))

    def set_legal_actions(self, legal_actions):
        self.action_handler.set_legal_actions(legal_actions)

    def save(self, file):
        self.cnn.save(file)

    def get_cost_list(self):
        return self.costList