Exemplo n.º 1
0
    def evaluate_agent(self, agent, steps=1000):
        """
        Прогонка цикла мира для конкретного агента (см. пример использования в комментариях после if _name__ == "__main__")
        :param agent: SimpleCarAgent
        :param steps: количество итераций цикла
        :param visual: рисовать картинку или нет
        :return: среднее значение награды агента за шаг
        """
        agent.evaluate_mode = True
        self.set_agents([agent])
        rewards = []
        if self.visual:
            scale = self._prepare_visualization()
        for _ in range(steps):
            vision = self.vision_for(agent)
            action = agent.choose_action(vision)
            next_agent_state, collision = self.physics.move(
                self.agent_states[agent], action)
            self.circles[agent] += angle(self.agent_states[agent].position,
                                         next_agent_state.position) / (2 * pi)
            self.agent_states[agent] = next_agent_state
            rewards.append(self.reward(next_agent_state, collision, vision))
            agent.receive_feedback(rewards[-1])
            if self.visual:
                self.visualize(scale)
                if self._update_display() == pygame.QUIT:
                    break
                # sleep(0.05)

        return np.mean(rewards), self.circles[agent]
Exemplo n.º 2
0
    def visualize(self, scale):
        """
        Рисует картинку. Этот и все "приватные" (начинающиеся с _) методы необязательны для разбора.
        """
        for i, agent in enumerate(self.agents):
            state = self.agent_states[agent]
            surface = self._agent_surfaces[i]
            rays_lengths = self.vision_for(agent)[-agent.rays:]
            self._agent_images[i] = [
                self._draw_ladar(rays_lengths, state, scale),
                self._get_agent_image(surface, state, scale)
            ]

        if len(self.agents) == 1:
            a = self.agents[0]
            if a.step > 0:
                draw_text("Reward: %.3f" % a.reward_history[-1],
                          self._info_surface,
                          scale,
                          self.size,
                          text_color=white,
                          bg_color=black)
                draw_text("Step: %d Avg reward: %.3f" % (a.step, a.avg_reward),
                          self._info_surface,
                          scale,
                          self.size,
                          text_color=white,
                          bg_color=black,
                          tlpoint=(self._info_surface.get_width() - 790, 10))
                steer, acc = a.chosen_actions_history[-1]
                state = self.agent_states[a]
                draw_text("Action: steer.: %.2f, accel: %.2f" % (steer, acc),
                          self._info_surface,
                          scale,
                          self.size,
                          text_color=white,
                          bg_color=black,
                          tlpoint=(self._info_surface.get_width() - 500, 10))
                draw_text("Inputs: |v|=%.2f, sin(angle): %.2f, circle: %.2f" %
                          (abs(state.velocity),
                           np.sin(angle(-state.position,
                                        state.heading)), self.circles[a]),
                          self._info_surface,
                          scale,
                          self.size,
                          text_color=white,
                          bg_color=black,
                          tlpoint=(self._info_surface.get_width() - 500, 50))
            return pygame.surfarray.array3d(self._agent_surfaces[0])
Exemplo n.º 3
0
    def vision_for(self, agent):
        """
        Строит видение мира для каждого агента
        :param agent: машинка, из которой мы смотрим
        :return: список из модуля скорости машинки, направленного угла между направлением машинки
        и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно)
        """
        state = self.agent_states[agent]
        vision = [
            abs(state.velocity),
            np.sin(angle(-state.position, state.heading))
        ]
        extras = len(vision)

        delta = pi / (agent.rays - 1)
        start = rotate(state.heading, -pi / 2)

        sectors = len(self.map)
        for i in range(agent.rays):
            # define ray direction
            ray = rotate(start, i * delta)

            # define ray's intersections with walls
            vision.append(np.infty)
            for j in range(sectors):
                inner_wall = self.map[j - 1][0], self.map[j][0]
                outer_wall = self.map[j - 1][1], self.map[j][1]

                intersect = intersect_ray_with_segment((state.position, ray),
                                                       inner_wall)
                intersect = abs(
                    intersect -
                    state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect

                intersect = intersect_ray_with_segment((state.position, ray),
                                                       outer_wall)
                intersect = abs(
                    intersect -
                    state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect

            assert vision[-1] < np.infty, \
                "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        assert len(vision) == agent.rays + extras, \
            "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        return vision
Exemplo n.º 4
0
 def transition(self):
     """
     Логика основного цикла:
      подсчёт для каждого агента видения агентом мира,
      выбор действия агентом,
      смена состояния
      и обработка реакции мира на выбранное действие
     """
     for a in self.agents:
         vision = self.vision_for(a)
         action = a.choose_action(vision)
         next_agent_state, collision = self.physics.move(
             self.agent_states[a], action)
         self.circles[a] += angle(self.agent_states[a].position,
                                  next_agent_state.position) / (2 * pi)
         self.agent_states[a] = next_agent_state
         a.receive_feedback(self.reward(next_agent_state, collision,
                                        vision))
Exemplo n.º 5
0
    def eval_reward(self, state, collision):
        """
        Награда "по умолчанию", используется в режиме evaluate
        Удобно, чтобы не приходилось отменять свои изменения в функции reward для оценки результата
        """
        a = -np.sin(angle(-state.position, state.heading))
        heading_reward = 1 if a > 0.1 else a if a > 0 else 0
        heading_penalty = a if a <= 0 else 0
        idle_penalty = 0 if abs(
            state.velocity) > self.MIN_SPEED else -self.IDLENESS_PENALTY
        speeding_penalty = 0 if abs(
            state.velocity) < self.MAX_SPEED else -self.SPEEDING_PENALTY * abs(
                state.velocity)
        collision_penalty = -max(abs(state.velocity),
                                 0.1) * int(collision) * self.COLLISION_PENALTY

        return heading_reward * self.HEADING_REWARD + heading_penalty * self.WRONG_HEADING_PENALTY + collision_penalty \
            + idle_penalty + speeding_penalty
Exemplo n.º 6
0
 def step(self, steering, acceleration):
     action = Action(steering, acceleration)
     for a in self.agents:
         next_agent_state, collision = self.physics.move(
             self.agent_states[a], action)
         progress = angle(self.agent_states[a].position,
                          next_agent_state.position) / (2 * pi)
         self.circles[a] += progress
         self.agent_states[a] = next_agent_state
         vision = self.vision_for(a)
         reward = self.reward(collision, progress)
         a.sensor_data_history.append(vision)
         a.chosen_actions_history.append(action)
         a.reward_history.append(reward)
         a.step += 1
         q = .001 if a.step > 1000 else 1. / float(a.step)
         a.avg_reward = (1. - q) * a.avg_reward + q * reward
         a.sum_reward += reward
         done = False
         if a.step == self.steps:
             done = True
             a.step = 0
         return np.array(vision), reward, done, {'collision': collision}