예제 #1
0
    def state_func(self, action_vw, observed):
        # Find the closest obstacle coordinate.
        obstacles_pt, front_obstacle_r = self.MAP.get_closest_obstacle_v2(self.agent.state)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        if front_obstacle_r is None:
            front_obstacle_r = self.sensor_r

        self.state = []
        for i in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(self.belief_targets[i].state[:2],
                                                xy_base=self.agent.state[:2],
                                                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                                    self.belief_targets[i].state[:2],
                                    self.belief_targets[i].state[2:],
                                    self.agent.state[:2], self.agent.state[2],
                                    action_vw[0], action_vw[1])
            is_belief_blocked = self.MAP.is_blocked(self.agent.state[:2], self.belief_targets[i].state[:2])
            self.state.extend([r_b, alpha_b, r_dot_b, alpha_dot_b,
                                np.log(LA.det(self.belief_targets[i].cov)),
                                float(observed[i]), float(is_belief_blocked)])
        self.state.extend([obstacles_pt[0], obstacles_pt[1], front_obstacle_r])
        self.state = np.array(self.state)

        # Update the visit frequency map.
        b_speed = np.mean([np.sqrt(np.sum(self.belief_targets[i].state[2:]**2)) for i in range(self.num_targets)])
        decay_factor = np.exp(self.sampling_period*b_speed/self.sensor_r*np.log(0.7))
        self.MAP.update_visit_freq_map(self.agent.state, decay_factor, observed=bool(np.mean(observed)))
예제 #2
0
    def state_func(self, action_vw, observed):
        # Find the closest obstacle coordinate.
        obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        for i in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(
                self.belief_targets[i].state[:2],
                xy_base=self.agent.state[:2],
                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                self.belief_targets[i].state[:2],
                self.belief_targets[i].state[2:], self.agent.state[:2],
                self.agent.state[2], action_vw[0], action_vw[1])
            self.state.extend([
                r_b, alpha_b, r_dot_b, alpha_dot_b,
                np.log(LA.det(self.belief_targets[i].cov)),
                float(observed[i])
            ])
        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)

        # Update the visit map when there is any target not observed for the evaluation purpose.
        if self.MAP.visit_map is not None:
            self.MAP.update_visit_freq_map(self.agent.state,
                                           1.0,
                                           observed=bool(np.mean(observed)))
예제 #3
0
    def step(self, action):
        self.agent.update(action, self.targets.state)

        # Update the true target state
        self.targets.update()
        # Observe
        measurements = self.agent.observation(self.targets.target)
        obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = infoplanner.IGL.MultiTargetFilter(measurements,
                                                           self.agent.agent,
                                                           debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())

        observed = [m.validity for m in measurements]
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        target_b_state = self.agent.get_belief_state()
        target_b_cov = self.agent.get_belief_cov()
        control_input = self.action_map[action]
        for n in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                xy_base=self.agent.state[:2],
                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                target_b_state[self.target_dim * n + 2:], self.agent.state[:2],
                self.agent.state[2], control_input[0], control_input[1])
            self.state.extend([
                r_b, alpha_b, r_dot_b, alpha_dot_b,
                np.log(
                    LA.det(target_b_cov[self.target_dim * n:self.target_dim *
                                        (n + 1), self.target_dim *
                                        n:self.target_dim * (n + 1)])),
                float(observed[n])
            ])

        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}