Exemplo n.º 1
0
    def simulate_action(self, si, ai, debug=False):
        """
        Query the resultant new state, observation and rewards, if action ai is taken from state si

        si: current state
        ai: action taken at the current state
        return: next state, observation and reward
        """
        # get new state
        s_probs = [self.transition_function(ai, si, sj) for sj in self.states]
        state = self.states[draw_arg(s_probs)]

        # get new observation
        o_probs = [
            self.observation_function(ai, state, oj)
            for oj in self.observations
        ]
        observation = self.observations[draw_arg(o_probs)]

        if debug:
            print('taking action {} at state {}'.format(ai, si))
            print('transition probs: {}'.format(s_probs))
            print('obs probs: {}'.format(o_probs))

        # get new reward
        # reward = self.reward_function(ai, si, sj, observation) #  --- THIS IS MORE GENERAL!
        reward = self.reward_function(ai, si)  # --- THIS IS TMP SOLUTION!
        cost = self.cost_function(ai)

        return state, observation, reward, cost
Exemplo n.º 2
0
    def gen_particles(self, n, prob=None):
        if prob is None:
            # by default use uniform distribution for particles generation
            prob = [1 / len(self.states)] * len(self.states)

        return [self.states[draw_arg(prob)] for i in range(n)]