Exemplo n.º 1
0
    def test_overlap_no_movement(self):
        cell_num = 4
        cell_size = 1
        om_channel_size = 1
        human = Human()
        human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0)
        other_agents = np.zeros((1, 4))
        occupancy_map = build_occupancy_map(human, other_agents, cell_num,
                                            cell_size, om_channel_size)

        radius = 1
        state = ObservableState(0, 0, 0, 0, radius)
        action = ActionXY(0, 0)
        next_state = propagate(state,
                               action,
                               time_step=.1,
                               kinematics='holonomic')
        next_occupancy_map = propagate_occupancy_map(occupancy_map, state,
                                                     action, .1, 'holonomic',
                                                     cell_num, cell_size,
                                                     om_channel_size)
        expected_next_occupancy_map = build_occupancy_map(
            next_state, other_agents, cell_num, cell_size, om_channel_size)

        self.assertTrue(
            np.array_equal(next_occupancy_map, expected_next_occupancy_map))
Exemplo n.º 2
0
    def test_propagate_occupancy_map_agent_center_movement_left_low_channel_size_three(
            self):
        cell_num = 4
        cell_size = 1
        om_channel_size = 3
        time_step = .1
        human = Human()
        human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0)
        other_agents = np.zeros((1, 4))
        other_agents[0, 0] = -1.5
        other_agents[0, 1] = 1.5
        occupancy_map = build_occupancy_map(human, other_agents, cell_num,
                                            cell_size, om_channel_size)

        action = ActionXY(-1, -1)
        next_state = propagate(human.get_observable_state(),
                               action,
                               time_step,
                               kinematics='holonomic')
        next_occupancy_map = propagate_occupancy_map(
            occupancy_map, human.get_observable_state(), action, time_step,
            'holonomic', cell_num, cell_size, om_channel_size)
        expected_next_occupancy_map = build_occupancy_map(
            next_state, other_agents, cell_num, cell_size, om_channel_size)

        self.assertTrue(
            np.array_equal(next_occupancy_map, expected_next_occupancy_map))
Exemplo n.º 3
0
 def test_no_movement(self):
     radius = 1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(0, 0)
     next_state = propagate(state,
                            action,
                            time_step=.1,
                            kinematics='holonomic')
     self.assertEqual(next_state, state)
Exemplo n.º 4
0
    def predict(self, state):
        """
        Input state is the joint state of robot concatenated by the observable state of other agents

        To predict the best action, agent samples actions and propagates one step to see how good the next state is
        thus the reward function is needed

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError(
                'Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(
                0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.self_state.v_pref)

        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(
                len(self.action_space))]
        else:
            self.action_values = list()
            max_min_value = float('-inf')
            max_action = None
            for action in self.action_space:
                next_self_state = propagate(state.self_state,
                                            action,
                                            time_step=self.time_step,
                                            kinematics=self.kinematics)
                ob, reward, done, info = self.env.onestep_lookahead(action)
                batch_next_states = torch.cat([
                    torch.Tensor([next_self_state + next_human_state]).to(
                        self.device) for next_human_state in ob
                ],
                                              dim=0)
                # VALUE UPDATE
                outputs = self.model(self.rotate(batch_next_states))
                min_output, min_index = torch.min(outputs, 0)
                min_value = reward + pow(
                    self.gamma, self.time_step *
                    state.self_state.v_pref) * min_output.data.item()
                self.action_values.append(min_value)
                if min_value > max_min_value:
                    max_min_value = min_value
                    max_action = action

        if self.phase == 'train':
            self.last_state = self.transform(state)

        return max_action
Exemplo n.º 5
0
    def predict(self, state):
        """
        A base class for all methods that takes pairwise joint state as input to value network.
        The input to the value network is always of shape (batch_size, # humans, rotated joint state length)

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError('Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.self_state.v_pref)

        occupancy_maps = None
        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(len(self.action_space))]
        else:
            self.action_values = list()
            max_value = float('-inf')
            max_action = None
            for action in self.action_space:
                next_self_state = propagate(state=state.self_state, action=action, time_step=self.time_step, kinematics=self.kinematics)
                if self.query_env:
                    next_human_states, reward, done, info = self.env.onestep_lookahead(action)
                else:
                    next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
                                       for human_state in state.human_states]
                    reward = self.compute_reward(next_self_state, next_human_states)
                batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
                                              for next_human_state in next_human_states], dim=0)
                rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
                if self.with_om:
                    if occupancy_maps is None:
                        occupancy_maps = self.build_occupancy_maps(next_human_states, next_self_state).unsqueeze(0)
                    rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps.to(self.device)], dim=2)
                # VALUE UPDATE
                next_state_value = self.model(rotated_batch_input).data.item()
                value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * next_state_value
                self.action_values.append(value)
                if value > max_value:
                    max_value = value
                    max_action = action
            if max_action is None:
                raise ValueError('Value network is not well trained. ')

        if self.phase == 'train':
            self.last_state = self.transform(state)

        return max_action
Exemplo n.º 6
0
 def test_holonomic_diagonal_up_movement(self):
     radius = 1
     time_step = .1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(np.sqrt(2), np.sqrt(2))
     next_state = propagate(state,
                            action,
                            time_step=time_step,
                            kinematics='holonomic')
     expected_state = ObservableState(time_step * np.sqrt(2),
                                      time_step * np.sqrt(2), action.vx,
                                      action.vy, radius)
     self.assertEqual(next_state, expected_state)
Exemplo n.º 7
0
    def test_non_holonomic_left_movement(self):
        radius = 1
        time_step = .1
        state = FullState(0, 0, 0, 0, radius, 0, 0, 0, 0)
        r = np.pi
        action = ActionRot(1., r)
        next_state = propagate(state,
                               action,
                               time_step=time_step,
                               kinematics='non_holonomic')
        expected_state = FullState(time_step * np.cos(r), time_step * np.sin(r), \
                                         np.cos(r), np.sin(r), radius, 0, 0, 0, r)

        self.assertEqual(expected_state, next_state)