Exemplo n.º 1
0
    def build_action_space(self, v_pref):
        """
        Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
        """
        holonomic = True if self.kinematics == 'holonomic' else False
        # speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
        speeds = [(i + 1) / self.speed_samples * v_pref for i in range(self.speed_samples)]
        if holonomic:
            rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
        else:
            if self.rotation_constraint == np.pi:
                rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples, endpoint=False)
            else:
                rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples)

        action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
        self.action_group_index.append(0)
        for j, speed in enumerate(speeds):
            for i, rotation in enumerate(rotations):
                action_index = j * self.rotation_samples + i + 1
                self.action_group_index.append(action_index)
                if holonomic:
                    action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
                else:
                    action_space.append(ActionRot(speed, rotation))
        self.speeds = speeds
        self.rotations = rotations
        self.action_space = action_space
Exemplo n.º 2
0
    def predict(self, state):
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError(
                'Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(
                0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)

        self.last_state = self.transform(state)
        #print(self.last_state)

        action, feats = self.model(self.last_state[0].unsqueeze(0),
                                   self.last_state[1].unsqueeze(0))

        action = action.squeeze()
        action = ActionXY(
            action[0].item(),
            action[1].item()) if self.kinematics == 'holonomic' else ActionRot(
                action[0].item(), action[1].item())

        self.last_state = self.last_state, feats
        return action
Exemplo n.º 3
0
    def predict(self, state):
        self_state = state.self_state

        close_obst = []
        for a in state.human_states:

            distance = math.sqrt((a.px - self_state.px)**2 +
                                 (a.py - self_state.py)**2)
            if (distance <= 1):
                close_obst.append([a.px, a.py, a.radius])

        # Go to goal
        if (len(close_obst) == 0):  # No obstacle in sensor skirt
            theta = np.arctan2(self_state.gy - self_state.py,
                               self_state.gx - self_state.px)
            vx = np.cos(theta) * self_state.v_pref
            vy = np.sin(theta) * self_state.v_pref
            action = ActionXY(vx, vy)

        # # Paranoid behavior - stop
        else:

            theta = np.arctan2(self_state.gy - self_state.py,
                               self_state.gx - self_state.px)
            vx = 0.5 * np.cos(theta) * self_state.v_pref
            vy = 0.5 * np.sin(theta) * self_state.v_pref
            action = ActionXY(vx, vy)

        return action
Exemplo n.º 4
0
 def predict(self, state):
     """
     A base class for all methods that takes pairwise joint state as input to value network.
     The input to the value network is always of shape (batch_size, # humans, rotated joint state length)
     """
     if self.phase is None or self.device is None:
         raise AttributeError('Phase, device attributes have to be set!')
     if self.phase == 'train' and self.epsilon is None:
         raise AttributeError('Epsilon attribute has to be set in training phase')
     if self.phase == 'train':
         self.last_state = self.transform(state)
     if self.reach_destination(state):
         return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
     state_tensor = state.to_tensor(add_batch_size=True, device=self.device)
     if self.phase == 'train':
         action = (
                 self.actor(state_tensor).squeeze().detach().numpy()
                 + np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim)
         ).clip(-self.max_action, self.max_action)
         speed = action[0]
         theta = action[1]
         Action = ActionXY(speed * np.cos(theta), speed * np.sin(theta)) \
             if self.kinematics == 'holonomic' else ActionRot(speed, theta)
         return Action, torch.tensor(action).float()
     else:
         with torch.no_grad():
             action = self.actor(state_tensor).squeeze().numpy()
             speed = action[0]
             theta = action[1]
             Action = ActionXY(speed * np.cos(theta), speed * np.sin(theta)) \
                 if self.kinematics == 'holonomic' else ActionRot(speed, theta)
         return Action, torch.tensor(action).float()
Exemplo n.º 5
0
    def build_action_space(self, v_pref):
        """
        Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
        """
        holonomic = True if self.kinematics == 'holonomic' else False
        speeds = [(np.exp(
            (i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref
                  for i in range(self.speed_samples)]
        if holonomic:
            rotations = np.linspace(0,
                                    2 * np.pi,
                                    self.rotation_samples,
                                    endpoint=False)
        else:
            rotations = np.linspace(-np.pi / 2, np.pi / 2,
                                    self.rotation_samples)

        action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
        for rotation, speed in itertools.product(rotations, speeds):
            if holonomic:
                action_space.append(
                    ActionXY(speed * np.cos(rotation),
                             speed * np.sin(rotation)))
            else:
                action_space.append(ActionRot(speed, rotation))

        self.speeds = speeds
        self.rotations = rotations
        self.action_space = action_space
    def predict(self, state):
        """
        A base class for all methods that takes pairwise joint state as input to value network.
        The input to the value network is always of shape (batch_size, # humans, rotated joint state length)

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError('Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.self_state.v_pref)

        occupancy_maps = None
        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(len(self.action_space))]
        else:
            self.action_values = list()
            max_value = float('-inf')
            max_action = None
            for action in self.action_space:
                next_self_state = self.propagate(state.self_state, action)
                if self.query_env:
                    next_human_states, reward, done, info = self.env.onestep_lookahead(action)
                else:
                    next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
                                       for human_state in state.human_states]
                    reward = self.compute_reward(next_self_state, next_human_states)
                batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
                                              for next_human_state in next_human_states], dim=0)
                #print('multi human_rl --- line 45  ')
                #print('prediction phase! ----> in RL ----')
                #print('rotation is callled ---------------------------------------------------------------------------------------------------------------------!')
                rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
                if self.with_om:
                    if occupancy_maps is None:
                        occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0)
                    rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2)
                # VALUE UPDATE
                next_state_value = self.model(rotated_batch_input).data.item()
                value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * next_state_value
                self.action_values.append(value)
                if value > max_value:
                    max_value = value
                    max_action = action
            if max_action is None:
                raise ValueError('Value network is not well trained. ')

        if self.phase == 'train':
            #print('---------multi_human_rl.py  line 64   ----->transformation done!')
            self.last_state = self.transform(state)

        return max_action
Exemplo n.º 7
0
    def predict(self, state):
        """
        A base class for all methods that takes pairwise joint state as input to value network.
        The input to the value network is always of shape (batch_size, # humans, rotated joint state length)

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError('Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(0, 0)
        if self.action_space is None:
            build_action_space()

        occupancy_maps = None
        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action_id = np.random.choice(self.action_space.shape[0])
            max_action = self.action_space[max_action_id]
        else:
            self.action_values = list()
            max_value = float('-inf')
            max_action = None
            max_action_id = None
            for action_id, action in enumerate(self.action_space):
                action = ActionXY(action[0], action[1])
                next_self_state = self.propagate(state.self_state, action)
                if self.query_env:
                    next_human_states, reward, done, info = self.env.onestep_lookahead(action)
                else:
                    next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
                                       for human_state in state.human_states]
                    reward = self.compute_reward(next_self_state, next_human_states)

                next_human_states = self.obstacle_to_human(next_human_states)
                batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
                                              for next_human_state in next_human_states if isinstance(next_human_state, ObservableState)], dim=0)
                rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
                if self.with_om:
                    if occupancy_maps is None:
                        self_state = np.array([next_self_state.px.numpy(), next_self_state.py.numpy(), next_self_state.vx.numpy(), next_self_state.vx.numpy()])
                        occupancy_maps = self.build_state_space(self_state, next_human_states).unsqueeze(0).to(self.device)
                        #occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0).to(self.device)
                    rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2)
                # VALUE UPDATE
                max_action, max_value, max_action_id = self.value_update(state, action, max_action, max_value, reward, rotated_batch_input, action_id, max_action_id)
            if max_action is None:
                raise ValueError('Value network is not well trained. ')

        if self.phase == 'train':
            self.last_state = self.transform(state)
        self.last_action_id = max_action_id
        return max_action
Exemplo n.º 8
0
    def predict(self, state):
        """
        Input state is the joint state of robot concatenated by the observable state of other agents

        To predict the best action, agent samples actions and propagates one step to see how good the next state is
        thus the reward function is needed

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError('Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.self_state.v_pref)
        if not state.human_states:
            assert self.phase != 'train'
            return self.select_greedy_action(state.self_state)

        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(len(self.action_space))]
        else:
            self.action_values = list()
            max_min_value = float('-inf')
            max_action = None
            for action in self.action_space:
                next_self_state = self.propagate(state.self_state, action)
                if self.query_env:
                    next_human_states, reward, done, info = self.env.onestep_lookahead(action)
                else:
                    next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
                                         for human_state in state.human_states]
                    reward = self.compute_reward(next_self_state, next_human_states)
                batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
                                              for next_human_state in next_human_states], dim=0)

                # VALUE UPDATE
                outputs = self.model(self.rotate(batch_next_states))
                min_output, min_index = torch.min(outputs, 0)
                min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item()
                self.action_values.append(min_value)
                if min_value > max_min_value:
                    max_min_value = min_value
                    max_action = action

        if self.phase == 'train':
            self.last_state = self.transform(state)

        return max_action
Exemplo n.º 9
0
def build_action_space(v_pref=1.0, speed_samples=5, rotation_samples=16):
    """
    Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
    """
    speeds = [(np.exp((i + 1) / speed_samples) - 1) / (np.e - 1) * v_pref for i in
              range(speed_samples)]

    rotations = np.linspace(0, 2 * np.pi, rotation_samples, endpoint=False)
    action_space = [ActionXY(0, 0)]
    for rotation, speed in itertools.product(rotations, speeds):
        action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))

    return torch.FloatTensor(action_space)
Exemplo n.º 10
0
    def build_action_space(self, v_pref):
        """
        Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
        """
        holonomic = True if self.kinematics == "holonomic" else False
        speeds = [(np.exp(
            (i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref
                  for i in range(self.speed_samples)]
        if holonomic:
            rotations = np.linspace(0,
                                    2 * np.pi,
                                    self.rotation_samples,
                                    endpoint=False)
        else:
            rotations = np.linspace(
                -self.rotation_constraint,
                self.rotation_constraint,
                self.rotation_samples,
            )

        action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
        for j, speed in enumerate(speeds):
            if j == 0:
                # index for action (0, 0)
                self.action_group_index.append(0)
            # only two groups in speeds
            if j < 3:
                speed_index = 0
            else:
                speed_index = 1

            for i, rotation in enumerate(rotations):
                rotation_index = i // 2

                action_index = (speed_index * self.sparse_rotation_samples +
                                rotation_index)
                self.action_group_index.append(action_index)

                if holonomic:
                    action_space.append(
                        ActionXY(speed * np.cos(rotation),
                                 speed * np.sin(rotation)))
                else:
                    action_space.append(ActionRot(speed, rotation))

        self.speeds = speeds
        self.rotations = rotations
        self.action_space = action_space
Exemplo n.º 11
0
    def optimize_epoch(self, num_epochs):
        if self.v_optimizer is None:
            raise ValueError('Learning rate is not set!')
        if self.data_loader is None:
            self.data_loader = DataLoader(self.memory,
                                          self.batch_size,
                                          shuffle=True)
        for epoch in range(num_epochs):
            epoch_v_loss = 0
            epoch_s_loss = 0
            logging.debug('{}-th epoch starts'.format(epoch))

            update_counter = 0
            for data in self.data_loader:
                robot_states, human_states, actions, values, dones, rewards, next_robot_state, next_human_states = data

                # optimize value estimator
                self.v_optimizer.zero_grad()
                actions = actions.to(self.device)
                outputs = self.value_estimator(
                    (robot_states,
                     human_states)).gather(1, actions.unsqueeze(1))
                values = values.to(self.device)
                loss = self.criterion(outputs, values)
                loss.backward()
                self.v_optimizer.step()
                epoch_v_loss += loss.data.item()

                # optimize state predictor
                if self.state_predictor.trainable:
                    update_state_predictor = True
                    if update_counter % self.state_predictor_update_interval != 0:
                        update_state_predictor = False

                    if update_state_predictor:
                        self.s_optimizer.zero_grad()
                        _, next_human_states_est = self.state_predictor(
                            (robot_states, human_states), None)
                        loss = self.criterion(next_human_states_est,
                                              next_human_states)
                        loss.backward()
                        self.s_optimizer.step()
                        epoch_s_loss += loss.data.item()
                    update_counter += 1
                else:
                    _, next_human_states_est = self.state_predictor(
                        (robot_states, human_states), ActionXY(0, 0))
                    loss = self.criterion(next_human_states_est,
                                          next_human_states)
                    epoch_s_loss += loss.data.item()

            logging.debug('{}-th epoch ends'.format(epoch))
            self.writer.add_scalar('IL/epoch_v_loss',
                                   epoch_v_loss / len(self.memory), epoch)
            self.writer.add_scalar('IL/epoch_s_loss',
                                   epoch_s_loss / len(self.memory), epoch)
            logging.info('Average loss in epoch %d: %.2E, %.2E', epoch,
                         epoch_v_loss / len(self.memory),
                         epoch_s_loss / len(self.memory))
        return
Exemplo n.º 12
0
    def predict(self, state):
        sf_state = []
        for agent_state in state:
            sf_state.append((
                agent_state.px,
                agent_state.py,
                agent_state.vx,
                agent_state.vy,
                agent_state.gx,
                agent_state.gy,
            ))

        sim = socialforce.Simulator(
            np.array(sf_state),
            delta_t=self.time_step,
            initial_speed=self.initial_speed,
            v0=self.v0,
            sigma=self.sigma,
        )
        sim.step()
        actions = [
            ActionXY(sim.state[i, 2], sim.state[i, 3])
            for i in range(len(state))
        ]
        del sim

        return actions
Exemplo n.º 13
0
    def clip_action(self, raw_action, v_pref):
        """
        Input state is the joint state of robot concatenated by the observable state of other agents

        To predict the best action, agent samples actions and propagates one step to see how good the next state is
        thus the reward function is needed

        """
        # quantize the action
        holonomic = True if self.config.action_space.kinematics == 'holonomic' else False
        # clip the action
        if holonomic:
            act_norm = np.linalg.norm(raw_action)
            if act_norm > v_pref:
                raw_action[0] = raw_action[0] / act_norm * v_pref
                raw_action[1] = raw_action[1] / act_norm * v_pref
            return ActionXY(raw_action[0], raw_action[1])
        else:
            # for sim2real
            raw_action[0] = np.clip(raw_action[0], -0.1,
                                    0.1)  # action[0] is change of v
            # raw[0, 1] = np.clip(raw[0, 1], -0.25, 0.25) # action[1] is change of w
            # raw[0, 0] = np.clip(raw[0, 0], -state.self_state.v_pref, state.self_state.v_pref) # action[0] is v
            raw_action[1] = np.clip(raw_action[1], -0.1,
                                    0.1)  # action[1] is change of theta

            return ActionRot(raw_action[0], raw_action[1])
Exemplo n.º 14
0
    def predict(self, state):
        """

        :param state:
        :return:
        """
        sf_state = []
        self_state = state.self_state
        sf_state.append((self_state.px, self_state.py, self_state.vx,
                         self_state.vy, self_state.gx, self_state.gy))
        for human_state in state.human_states:
            # approximate desired direction with current velocity
            if human_state.vx == 0 and human_state.vy == 0:
                gx = np.random.random()
                gy = np.random.random()
            else:
                gx = human_state.px + human_state.vx
                gy = human_state.py + human_state.vy
            sf_state.append((human_state.px, human_state.py, human_state.vx,
                             human_state.vy, gx, gy))
        sim = socialforce.Simulator(np.array(sf_state),
                                    delta_t=self.time_step,
                                    initial_speed=self.initial_speed,
                                    v0=self.v0,
                                    sigma=self.sigma)
        sim.step()
        action = ActionXY(sim.state[0, 2], sim.state[0, 3])

        self.last_state = state

        return action
Exemplo n.º 15
0
    def predict(self, state):
        """ Centralized planning for all agents """
        params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
        if self.sim is not None and self.sim.getNumAgents() != len(state):
            del self.sim
            self.sim = None

        if self.sim is None:
            self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed)
            for agent_state in state:
                self.sim.addAgent(agent_state.position, *params, agent_state.radius + 0.01 + self.safety_space,
                                  self.max_speed, agent_state.velocity)
        else:
            for i, agent_state in enumerate(state):
                self.sim.setAgentPosition(i, agent_state.position)
                self.sim.setAgentVelocity(i, agent_state.velocity)

        # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal.
        for i, agent_state in enumerate(state):
            velocity = np.array((agent_state.gx - agent_state.px, agent_state.gy - agent_state.py))
            speed = np.linalg.norm(velocity)
            pref_vel = velocity / speed if speed > 1 else velocity
            self.sim.setAgentPrefVelocity(i, (pref_vel[0], pref_vel[1]))

        self.sim.doStep()
        actions = [ActionXY(*self.sim.getAgentVelocity(i)) for i in range(len(state))]

        return actions
Exemplo n.º 16
0
    def test_propagate_occupancy_map_agent_center_movement_left_low_channel_size_three(
            self):
        cell_num = 4
        cell_size = 1
        om_channel_size = 3
        time_step = .1
        human = Human()
        human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0)
        other_agents = np.zeros((1, 4))
        other_agents[0, 0] = -1.5
        other_agents[0, 1] = 1.5
        occupancy_map = build_occupancy_map(human, other_agents, cell_num,
                                            cell_size, om_channel_size)

        action = ActionXY(-1, -1)
        next_state = propagate(human.get_observable_state(),
                               action,
                               time_step,
                               kinematics='holonomic')
        next_occupancy_map = propagate_occupancy_map(
            occupancy_map, human.get_observable_state(), action, time_step,
            'holonomic', cell_num, cell_size, om_channel_size)
        expected_next_occupancy_map = build_occupancy_map(
            next_state, other_agents, cell_num, cell_size, om_channel_size)

        self.assertTrue(
            np.array_equal(next_occupancy_map, expected_next_occupancy_map))
Exemplo n.º 17
0
    def act(self, state_tensor
            ):  # state is a batch of tensors rather than a joint state
        # value, mu, cov = self.value_action_predictor(state_tensor)
        # dist = MultivariateNormal(mu, cov)
        # actions = dist.sample()
        # action_log_probs = dist.log_prob(actions)
        # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()]

        value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor(
            state_tensor)
        vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
        vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
        actions = torch.cat(
            [vx_dist.sample().unsqueeze(1),
             vy_dist.sample().unsqueeze(1)],
            dim=1)
        action_log_probs = vx_dist.log_prob(
            actions[:, 0]).unsqueeze(1) + vy_dist.log_prob(
                actions[:, 1]).unsqueeze(1)
        action_to_take = [
            ActionXY(action[0] * 2 - 1, action[1] * 2 - 1)
            for action in actions.cpu().numpy()
        ]

        return value, actions, action_log_probs, action_to_take
    def compute_action_vals_activs(self, action, state):
        next_self_state = self.propagate(state.self_state, action)

        if self.query_env:
            next_human_states, reward, done, info = self.env.onestep_lookahead(
                action)
        else:
            next_human_states = [
                self.propagate(human_state,
                               ActionXY(human_state.vx, human_state.vy))
                for human_state in state.human_states
            ]
            reward = self.compute_reward(next_self_state, next_human_states)
        batch_next_states = torch.cat([
            torch.Tensor([next_self_state + next_human_state]).to(self.device)
            for next_human_state in next_human_states
        ],
                                      dim=0)
        rotated_batch_input = self.rotate(batch_next_states)

        occupancy_maps = None
        if self.with_om:
            if occupancy_maps is None:
                occupancy_maps = self.build_occupancy_maps(
                    next_human_states).unsqueeze(0)
            rotated_batch_input = torch.cat(
                [rotated_batch_input,
                 occupancy_maps.to(self.device)], dim=2)
        # VALUE UPDATE

        rotated_batch_input = rotated_batch_input.unsqueeze(0)

        return action, rotated_batch_input, reward
Exemplo n.º 19
0
    def test_overlap_no_movement(self):
        cell_num = 4
        cell_size = 1
        om_channel_size = 1
        human = Human()
        human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0)
        other_agents = np.zeros((1, 4))
        occupancy_map = build_occupancy_map(human, other_agents, cell_num,
                                            cell_size, om_channel_size)

        radius = 1
        state = ObservableState(0, 0, 0, 0, radius)
        action = ActionXY(0, 0)
        next_state = propagate(state,
                               action,
                               time_step=.1,
                               kinematics='holonomic')
        next_occupancy_map = propagate_occupancy_map(occupancy_map, state,
                                                     action, .1, 'holonomic',
                                                     cell_num, cell_size,
                                                     om_channel_size)
        expected_next_occupancy_map = build_occupancy_map(
            next_state, other_agents, cell_num, cell_size, om_channel_size)

        self.assertTrue(
            np.array_equal(next_occupancy_map, expected_next_occupancy_map))
Exemplo n.º 20
0
    def __init__(self, robot_data):

        self.robot_radius = robot_data['radius']
        self.robot_pref_speed = robot_data[
            'pref_speed']  # TODO: Make this a dynamic variable
        self.robot_mpc = robot_data['mpc']
        if self.robot_mpc:
            self.dt = 0.1
            self.times_steps = int(1.0 / self.dt)
            self.mpc = traj_opt(dt=self.dt, time_steps=self.times_steps)
            self.mpc_state = ModelState()
            self.mpc_psi = None

        self.stop_moving_flag = False
        self.state = ModelState()
        self.STATE_SET_FLAG = False
        self.goal = PoseStamped()
        self.GOAL_RECEIVED_FLAG = False
        self.GOAL_THRESH = 0.5

        # External Agent(s) state
        self.other_agents_state = [
            ObservableState(float("inf"), float("inf"), 0, 0, 0.3)
        ]
        self.OBS_RECEIVED_FLAG = False

        # what we use to send commands
        self.desired_action = ActionXY(0, 0)
Exemplo n.º 21
0
    def predict(self, state):
        """
        Produce action for agent with circular specification of social force model.
        """
        # Pull force to goal
        delta_x = state.self_state.gx - state.self_state.px
        delta_y = state.self_state.gy - state.self_state.py
        dist_to_goal = np.sqrt(delta_x**2 + delta_y**2)
        desired_vx = (delta_x / dist_to_goal) * state.self_state.v_pref
        desired_vy = (delta_y / dist_to_goal) * state.self_state.v_pref
        KI = self.config.sf.KI  # Inverse of relocation time K_i
        curr_delta_vx = KI * (desired_vx - state.self_state.vx)
        curr_delta_vy = KI * (desired_vy - state.self_state.vy)

        # Push force(s) from other agents
        A = self.config.sf.A  # Other observations' interaction strength: 1.5
        B = self.config.sf.B  # Other observations' interaction range: 1.0
        interaction_vx = 0
        interaction_vy = 0
        for other_human_state in state.human_states:
            delta_x = state.self_state.px - other_human_state.px
            delta_y = state.self_state.py - other_human_state.py
            dist_to_human = np.sqrt(delta_x**2 + delta_y**2)
            interaction_vx += A * np.exp(
                (state.self_state.radius + other_human_state.radius -
                 dist_to_human) / B) * (delta_x / dist_to_human)
            interaction_vy += A * np.exp(
                (state.self_state.radius + other_human_state.radius -
                 dist_to_human) / B) * (delta_y / dist_to_human)

        # Sum of push & pull forces
        total_delta_vx = (curr_delta_vx +
                          interaction_vx) * self.config.env.time_step
        total_delta_vy = (curr_delta_vy +
                          interaction_vy) * self.config.env.time_step

        # clip the speed so that sqrt(vx^2 + vy^2) <= v_pref
        new_vx = state.self_state.vx + total_delta_vx
        new_vy = state.self_state.vy + total_delta_vy
        act_norm = np.linalg.norm([new_vx, new_vy])

        if act_norm > state.self_state.v_pref:
            return ActionXY(new_vx / act_norm * state.self_state.v_pref,
                            new_vy / act_norm * state.self_state.v_pref)
        else:
            return ActionXY(new_vx, new_vy)
    def predict(self, state):
        """
        A base class for all methods that takes pairwise joint state as input to value network.
        The input to the value network is always of shape (batch_size, # humans, rotated joint state length)

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError(
                'Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(
                0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.robot_state.v_pref)

        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(
                len(self.action_space))]
        else:
            max_action = None
            max_value = float('-inf')
            max_traj = None

            if self.do_action_clip:
                state_tensor = state.to_tensor(add_batch_size=True,
                                               device=self.device)
                action_space_clipped = self.action_clip(
                    state_tensor, self.action_space, self.planning_width)
            else:
                action_space_clipped = self.action_space

            for action in action_space_clipped:
                state_tensor = state.to_tensor(add_batch_size=True,
                                               device=self.device)
                next_state = self.state_predictor(state_tensor, action)
                max_next_return, max_next_traj = self.V_planning(
                    next_state, self.planning_depth, self.planning_width)
                reward_est = self.estimate_reward(state, action)
                value = reward_est + self.get_normalized_gamma(
                ) * max_next_return
                if value > max_value:
                    max_value = value
                    max_action = action
                    max_traj = [(state_tensor, action, reward_est)
                                ] + max_next_traj
            if max_action is None:
                raise ValueError('Value network is not well trained.')

        if self.phase == 'train':
            self.last_state = self.transform(state)
        else:
            self.traj = max_traj

        return max_action
Exemplo n.º 23
0
    def predict(self, state):
        """
        Create a rvo2 simulation at each time step and run one step
        Python-RVO2 API: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/rvo2.pyx
        How simulation is done in RVO2: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/Agent.cpp

        Agent doesn't stop moving after it reaches the goal, because once it stops moving, the reciprocal rule is broken

        :param state:
        :return:
        """
        self_state = state.self_state
        params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
        if self.sim is not None and self.sim.getNumAgents() != len(
                state.human_states) + 1:
            del self.sim
            self.sim = None
        if self.sim is None:
            self.sim = rvo2.PyRVOSimulator(self.time_step, *params,
                                           self.radius, self.max_speed)
            self.sim.addAgent(self_state.position, *params,
                              self_state.radius + 0.01 + self.safety_space,
                              self_state.v_pref, self_state.velocity)
            for human_state in state.human_states:
                self.sim.addAgent(
                    human_state.position, *params,
                    human_state.radius + 0.01 + self.safety_space,
                    self.max_speed, human_state.velocity)

        else:
            self.sim.setAgentPosition(0, self_state.position)
            self.sim.setAgentVelocity(0, self_state.velocity)
            for i, human_state in enumerate(state.human_states):
                self.sim.setAgentPosition(i + 1, human_state.position)
                self.sim.setAgentVelocity(i + 1, human_state.velocity)

        # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal.
        velocity = np.array(
            (self_state.gx - self_state.px, self_state.gy - self_state.py))
        speed = np.linalg.norm(velocity)
        pref_vel = velocity / speed if speed > 1 else velocity

        # Perturb a little to avoid deadlocks due to perfect symmetry.
        # perturb_angle = np.random.random() * 2 * np.pi
        # perturb_dist = np.random.random() * 0.01
        # perturb_vel = np.array((np.cos(perturb_angle), np.sin(perturb_angle))) * perturb_dist
        # pref_vel += perturb_vel

        self.sim.setAgentPrefVelocity(0, tuple(pref_vel))
        for i, human_state in enumerate(state.human_states):
            # unknown goal position of other humans
            self.sim.setAgentPrefVelocity(i + 1, (0, 0))

        self.sim.doStep()
        action = ActionXY(*self.sim.getAgentVelocity(0))
        self.last_state = state

        return action
Exemplo n.º 24
0
 def test_no_movement(self):
     radius = 1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(0, 0)
     next_state = propagate(state,
                            action,
                            time_step=.1,
                            kinematics='holonomic')
     self.assertEqual(next_state, state)
Exemplo n.º 25
0
 def predict(self, state):
     vx = 0
     vy = 0
     if self.move[0]: vx = -1
     if self.move[1]: vx = 1
     if self.move[2]: vy = 1
     if self.move[3]: vy = -1
     action = ActionXY(vx, vy)
     return action
    def predict(self, state):
        self_state = state.self_state
        theta = np.arctan2(self_state.gy - self_state.py,
                           self_state.gx - self_state.px)
        vx = np.cos(theta) * self_state.v_pref
        vy = np.sin(theta) * self_state.v_pref
        action = ActionXY(vx, vy)

        return action
    def act(self, ob):
        if self.policy is None:
            raise AttributeError('Policy attribute has to be set!')
        state = JointState(self.get_full_state(), ob)

        # print('----debug robot.py line 15 ')
        # print('robot cordinate = [',self.px,',',self.py,']')

        action = self.policy.predict(state)
        #IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!    CHNGE IT IF YOU DONT WANT TO CREATE SIMULATION DATA
        action = ActionXY(0, 0)
        return action
Exemplo n.º 28
0
 def test_holonomic_diagonal_up_movement(self):
     radius = 1
     time_step = .1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(np.sqrt(2), np.sqrt(2))
     next_state = propagate(state,
                            action,
                            time_step=time_step,
                            kinematics='holonomic')
     expected_state = ObservableState(time_step * np.sqrt(2),
                                      time_step * np.sqrt(2), action.vx,
                                      action.vy, radius)
     self.assertEqual(next_state, expected_state)
Exemplo n.º 29
0
    def action_clip(self, state, action_space, width, depth=1):
        values = []
        actions = []
        if self.kinematics == "holonomic":
            actions.append(ActionXY(0, 0))
        else:
            actions.append(ActionRot(0, 0))
        # actions.append(ActionXY(0, 0))
        next_robot_states = None
        next_human_states = None
        pre_next_state = self.state_predictor(state, actions)
        for action in action_space:
            # actions = []
            # actions.append(action)
            next_robot_state = self.compute_next_robot_state(state[0], action)
            next_human_state = pre_next_state[1]
            if next_robot_states is None and next_human_states is None:
                next_robot_states = next_robot_state
                next_human_states = next_human_state
            else:
                next_robot_states = torch.cat((next_robot_states, next_robot_state), dim=0)
                next_human_states = torch.cat((next_human_states, next_human_state), dim=0)
            next_state_tensor = (next_robot_state, next_human_state)
            next_state = tensor_to_joint_state(next_state_tensor)
            reward_est, _ = self.reward_estimator.estimate_reward_on_predictor(state, next_state)
            values.append(reward_est)
        next_return = self.value_estimator((next_robot_states, next_human_states)).squeeze()
        next_return = np.array(next_return.data.detach())
        values = np.array(values) + self.get_normalized_gamma() * next_return
        values = values.tolist()

        if self.sparse_search:
            # self.sparse_speed_samples = 2
            # search in a sparse grained action space
            added_groups = set()
            max_indices = np.argsort(np.array(values))[::-1]
            clipped_action_space = []
            for index in max_indices:
                if self.action_group_index[index] not in added_groups:
                    clipped_action_space.append(action_space[index])
                    added_groups.add(self.action_group_index[index])
                    if len(clipped_action_space) == width:
                        break
        else:
            max_indexes = np.argpartition(np.array(values), -width)[-width:]
            clipped_action_space = [action_space[i] for i in max_indexes]

        # print(clipped_action_space)
        return clipped_action_space
Exemplo n.º 30
0
    def predict(self, state):
        """
        Input state is the joint state of robot concatenated by the observable state of other agents

        To predict the best action, agent samples actions and propagates one step to see how good the next state is
        thus the reward function is needed

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError(
                'Epsilon attribute has to be set in training phase')
        if self.reach_destination(state):
            return ActionXY(
                0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)

        host_agent = agent.Agent(state.self_state.px, state.self_state.py,
                                 state.self_state.gx, state.self_state.gy,
                                 state.self_state.radius,
                                 state.self_state.v_pref,
                                 state.self_state.theta, 0)
        host_agent.vel_global_frame = np.array(
            [state.self_state.vx, state.self_state.vy])

        other_agents = []
        for i, human_state in enumerate(state.human_states):
            if self.human_state_in_FOV(state.self_state, human_state):
                x = human_state.px
                y = human_state.py
                v_x = human_state.vx
                v_y = human_state.vy
                heading_angle = np.arctan2(v_y, v_x)
                pref_speed = np.linalg.norm(np.array([v_x, v_y]))
                goal_x = x + 5.0
                goal_y = y + 5.0
                other_agents.append(
                    agent.Agent(x, y, goal_x, goal_y, human_state.radius,
                                pref_speed, heading_angle, i + 1))
                other_agents[-1].vel_global_frame = np.array([v_x, v_y])
        obs = host_agent.observe(other_agents)[1:]
        obs = np.expand_dims(obs, axis=0)
        predictions = self.net.predict_p(obs, None)[0]
        raw_action = self.possible_actions.actions[np.argmax(predictions)]
        action = ActionRot(host_agent.pref_speed * raw_action[0],
                           util.wrap(raw_action[1]))
        return action