def _updateQ(self, s_id, a_id, reward, discount_factor, similarity, best_future_utility): if s_id not in self.q: self.q[s_id] = {} if a_id not in self.q[s_id]: self.q[s_id][a_id] = 0 # this is a typical q-learning except for "similarity," which is factored # in to help deal with how big the state space is q_value = ( 1 - self.learning_rate) * self.q[s_id][a_id] + self.learning_rate * ( reward + similarity * discount_factor * best_future_utility) self.q[s_id][a_id] = q_value Database.updateQ(s_id, a_id, q_value)