def simulate_action(self, si, ai, debug=False): """ Query the resultant new state, observation and rewards, if action ai is taken from state si si: current state ai: action taken at the current state return: next state, observation and reward """ # get new state s_probs = [self.transition_function(ai, si, sj) for sj in self.states] state = self.states[draw_arg(s_probs)] # get new observation o_probs = [ self.observation_function(ai, state, oj) for oj in self.observations ] observation = self.observations[draw_arg(o_probs)] if debug: print('taking action {} at state {}'.format(ai, si)) print('transition probs: {}'.format(s_probs)) print('obs probs: {}'.format(o_probs)) # get new reward # reward = self.reward_function(ai, si, sj, observation) # --- THIS IS MORE GENERAL! reward = self.reward_function(ai, si) # --- THIS IS TMP SOLUTION! cost = self.cost_function(ai) return state, observation, reward, cost
def gen_particles(self, n, prob=None): if prob is None: # by default use uniform distribution for particles generation prob = [1 / len(self.states)] * len(self.states) return [self.states[draw_arg(prob)] for i in range(n)]