Python ExperienceDatabase示例

编程语言: Python

命名空间/包名称: rl.experience_database

hotexamples.com的示例: 2

Python ExperienceDatabase - 已找到2个示例。这些是从开源项目中提取的最受好评的rl.experience_database.ExperienceDatabase现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

add(1)

sample(1)

示例#1

显示文件

文件： reinforcement_learner.py 项目： frw/2048-DRL

class DeepQLearner (BaseLearner):
    def __init__(self):
        super(DeepQLearner, self).__init__()
        self.discount_rate = 0.95
        self.network = QNetwork()
        self.database = ExperienceDatabase(100000)
        self.weights = []

    def decide_action(self, new_state, possible_moves):
        # compute Q-scores with forward-propagation
        scores = np.zeros(len(possible_moves))
        for i, move in enumerate(possible_moves):
            scores[i] = self.network.use_model(new_state, move)

        # train neural-network
        if self.last_state is not None:
            self.database.add(self.last_state, self.last_action, self.last_reward, new_state, possible_moves)

            past_last_state, past_last_action, past_last_reward, past_new_state, past_possible_moves = self.database.sample(1)[0]

            best_score = None
            for move in past_possible_moves:
                best_score = max(best_score, self.network.use_model(past_new_state, move))

            # update weights with back-propagation
            self.network.update_model(past_last_state, past_last_action, float(past_last_reward + self.discount_rate * best_score))

        return possible_moves[self.explorer.decide_action(self.epoch, scores)]

    def end_epoch(self, score):
        super(DeepQLearner, self).end_epoch(score)

        #save the network weights at this epoch
        if self.epoch % 1000 == 0:
            self.weights.append(self.network.get_all_weights())

示例#2

显示文件

文件： reinforcement_learner.py 项目： frw/2048-DRL

 def __init__(self):
     super(DeepQLearner, self).__init__()
     self.discount_rate = 0.95
     self.network = QNetwork()
     self.database = ExperienceDatabase(100000)
     self.weights = []