def test_overlap_no_movement(self): cell_num = 4 cell_size = 1 om_channel_size = 1 human = Human() human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0) other_agents = np.zeros((1, 4)) occupancy_map = build_occupancy_map(human, other_agents, cell_num, cell_size, om_channel_size) radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') next_occupancy_map = propagate_occupancy_map(occupancy_map, state, action, .1, 'holonomic', cell_num, cell_size, om_channel_size) expected_next_occupancy_map = build_occupancy_map( next_state, other_agents, cell_num, cell_size, om_channel_size) self.assertTrue( np.array_equal(next_occupancy_map, expected_next_occupancy_map))
def test_propagate_occupancy_map_agent_center_movement_left_low_channel_size_three( self): cell_num = 4 cell_size = 1 om_channel_size = 3 time_step = .1 human = Human() human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0) other_agents = np.zeros((1, 4)) other_agents[0, 0] = -1.5 other_agents[0, 1] = 1.5 occupancy_map = build_occupancy_map(human, other_agents, cell_num, cell_size, om_channel_size) action = ActionXY(-1, -1) next_state = propagate(human.get_observable_state(), action, time_step, kinematics='holonomic') next_occupancy_map = propagate_occupancy_map( occupancy_map, human.get_observable_state(), action, time_step, 'holonomic', cell_num, cell_size, om_channel_size) expected_next_occupancy_map = build_occupancy_map( next_state, other_agents, cell_num, cell_size, om_channel_size) self.assertTrue( np.array_equal(next_occupancy_map, expected_next_occupancy_map))
def test_no_movement(self): radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') self.assertEqual(next_state, state)
def predict(self, state): """ Input state is the joint state of robot concatenated by the observable state of other agents To predict the best action, agent samples actions and propagates one step to see how good the next state is thus the reward function is needed """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError( 'Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY( 0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.self_state.v_pref) probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice( len(self.action_space))] else: self.action_values = list() max_min_value = float('-inf') max_action = None for action in self.action_space: next_self_state = propagate(state.self_state, action, time_step=self.time_step, kinematics=self.kinematics) ob, reward, done, info = self.env.onestep_lookahead(action) batch_next_states = torch.cat([ torch.Tensor([next_self_state + next_human_state]).to( self.device) for next_human_state in ob ], dim=0) # VALUE UPDATE outputs = self.model(self.rotate(batch_next_states)) min_output, min_index = torch.min(outputs, 0) min_value = reward + pow( self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item() self.action_values.append(min_value) if min_value > max_min_value: max_min_value = min_value max_action = action if self.phase == 'train': self.last_state = self.transform(state) return max_action
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError('Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.self_state.v_pref) occupancy_maps = None probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice(len(self.action_space))] else: self.action_values = list() max_value = float('-inf') max_action = None for action in self.action_space: next_self_state = propagate(state=state.self_state, action=action, time_step=self.time_step, kinematics=self.kinematics) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead(action) else: next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states] reward = self.compute_reward(next_self_state, next_human_states) batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device) for next_human_state in next_human_states], dim=0) rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0) if self.with_om: if occupancy_maps is None: occupancy_maps = self.build_occupancy_maps(next_human_states, next_self_state).unsqueeze(0) rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps.to(self.device)], dim=2) # VALUE UPDATE next_state_value = self.model(rotated_batch_input).data.item() value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * next_state_value self.action_values.append(value) if value > max_value: max_value = value max_action = action if max_action is None: raise ValueError('Value network is not well trained. ') if self.phase == 'train': self.last_state = self.transform(state) return max_action
def test_holonomic_diagonal_up_movement(self): radius = 1 time_step = .1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(np.sqrt(2), np.sqrt(2)) next_state = propagate(state, action, time_step=time_step, kinematics='holonomic') expected_state = ObservableState(time_step * np.sqrt(2), time_step * np.sqrt(2), action.vx, action.vy, radius) self.assertEqual(next_state, expected_state)
def test_non_holonomic_left_movement(self): radius = 1 time_step = .1 state = FullState(0, 0, 0, 0, radius, 0, 0, 0, 0) r = np.pi action = ActionRot(1., r) next_state = propagate(state, action, time_step=time_step, kinematics='non_holonomic') expected_state = FullState(time_step * np.cos(r), time_step * np.sin(r), \ np.cos(r), np.sin(r), radius, 0, 0, 0, r) self.assertEqual(expected_state, next_state)