Exemplo n.º 1
0
    def step(self, action):
        self.agent.update(action, self.target.state)

        # Update the true target state
        self.target.update()
        # Observe
        measurements = self.agent.observation(self.target.target)
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = IGL.MultiTargetFilter(measurements,
                                               self.agent.agent,
                                               debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_target.update(self.agent.get_belief_state(),
                                  self.agent.get_belief_cov())

        reward, done, test_reward = self.get_reward(obstacles_pt,
                                                    measurements[0].validity,
                                                    self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        r_b, alpha_b, _ = util.relative_measure(
            self.agent.get_belief_state()[:2], self.agent.state)
        rel_target_vel = util.coord_change2b(self.agent.get_belief_state()[:2],
                                             alpha_b + self.agent.state[-1])
        self.state = np.array([
            r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
            np.log(LA.det(self.agent.get_belief_cov())),
            float(measurements[0].validity), obstacles_pt[0], obstacles_pt[1]
        ])
        return self.state, reward, done, {'test_reward': test_reward}
Exemplo n.º 2
0
    def step(self, action):
        action_val = self.action_map[action]
        boundary_penalty = self.agent.update(
            action_val, [t.state[:2] for t in self.targets])
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        observed = []
        for i in range(self.num_targets):
            self.targets[i].update(self.agent.state[:2])
            # Observe
            obs = self.observation(self.targets[i])
            observed.append(obs[0])
            # Update the belief of the agent on the target using KF
            self.belief_targets[i].update(obs[0], obs[1], self.agent.state)

        reward, done, test_reward = self.get_reward(obstacles_pt, observed,
                                                    self.is_training)
        self.state = []
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        for i in range(self.num_targets):
            r_b, alpha_b, _ = util.relative_measure(
                self.belief_targets[i].state, self.agent.state)
            rel_target_vel = util.coord_change2b(
                self.belief_targets[i].state[2:],
                alpha_b + self.agent.state[-1])
            self.state.extend([
                r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
                np.log(LA.det(self.belief_targets[i].cov)),
                float(observed[i])
            ])
        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'test_reward': test_reward}
Exemplo n.º 3
0
    def step(self, action):
        self.agent.update(action, self.targets.state)

        # Update the true target state
        self.targets.update()
        # Observe
        measurements = self.agent.observation(self.targets.target)
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = IGL.MultiTargetFilter(measurements,
                                               self.agent.agent,
                                               debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())

        observed = [m.validity for m in measurements]
        reward, done, test_reward = self.get_reward(obstacles_pt, observed,
                                                    self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        target_b_state = self.agent.get_belief_state()
        target_b_cov = self.agent.get_belief_cov()
        for n in range(self.num_targets):
            r_b, alpha_b, _ = util.relative_measure(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                self.agent.state)
            rel_target_vel = util.coord_change2b(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                alpha_b + self.agent.state[-1])
            self.state.extend([
                r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
                np.log(
                    LA.det(target_b_cov[self.target_dim * n:self.target_dim *
                                        (n + 1), self.target_dim *
                                        n:self.target_dim * (n + 1)])),
                float(observed[n])
            ])

        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'test_reward': test_reward}