class DeepQLearner (BaseLearner): def __init__(self): super(DeepQLearner, self).__init__() self.discount_rate = 0.95 self.network = QNetwork() self.database = ExperienceDatabase(100000) self.weights = [] def decide_action(self, new_state, possible_moves): # compute Q-scores with forward-propagation scores = np.zeros(len(possible_moves)) for i, move in enumerate(possible_moves): scores[i] = self.network.use_model(new_state, move) # train neural-network if self.last_state is not None: self.database.add(self.last_state, self.last_action, self.last_reward, new_state, possible_moves) past_last_state, past_last_action, past_last_reward, past_new_state, past_possible_moves = self.database.sample(1)[0] best_score = None for move in past_possible_moves: best_score = max(best_score, self.network.use_model(past_new_state, move)) # update weights with back-propagation self.network.update_model(past_last_state, past_last_action, float(past_last_reward + self.discount_rate * best_score)) return possible_moves[self.explorer.decide_action(self.epoch, scores)] def end_epoch(self, score): super(DeepQLearner, self).end_epoch(score) #save the network weights at this epoch if self.epoch % 1000 == 0: self.weights.append(self.network.get_all_weights())
def __init__(self): super(DeepQLearner, self).__init__() self.discount_rate = 0.95 self.network = QNetwork() self.database = ExperienceDatabase(100000) self.weights = []