예제 #1
0
    def _updateQ(self, s_id, a_id, reward, discount_factor, similarity,
                 best_future_utility):
        if s_id not in self.q:
            self.q[s_id] = {}

        if a_id not in self.q[s_id]:
            self.q[s_id][a_id] = 0

        # this is a typical q-learning except for "similarity," which is factored
        # in to help deal with how big the state space is
        q_value = (
            1 -
            self.learning_rate) * self.q[s_id][a_id] + self.learning_rate * (
                reward + similarity * discount_factor * best_future_utility)
        self.q[s_id][a_id] = q_value
        Database.updateQ(s_id, a_id, q_value)