示例#1
0
    def run(self):
        event = self._sim.get_next_event()

        if isinstance(event, EpisodeStartEvent):
            log.event("Episode Start")
            observation = self._sim.gym_episode_start(params(event))
            event.initial_state = state(observation)

        elif isinstance(event, SimulateEvent):
            log.event("Simulate")
            obs, rwd, done, _ = self._sim.gym_simulate(action(event))
            event.state = state(obs)
            event.reward = reward(rwd)
            event.terminal = terminal(done)
            self._episode_reward += rwd
            self._sim.render()

        elif isinstance(event, EpisodeFinishEvent):
            log.event("Episode Finish")
            print("Episode {} reward: {}".format(
                self._episode_count, self._episode_reward))
            self._episode_count += 1
            self._episode_reward = 0

        elif isinstance(event, FinishedEvent):
            log.event("Finished")
            return False
        elif event is None:
            return False

        return True
示例#2
0
    def episode_start(self, parameters=None):
        """ called at the start of every episode. should
        reset the simulation and return the initial state
        """
        log.info('Episode {} Starting'.format(self.episode_count))
        self.model.reset()

        return star.state(self.model.state)
示例#3
0
    def simulate(self, brain_action):
        action = star.action(brain_action)

        self.model.step(action)

        terminal    = star.terminal(self.model.state)
        reward      = star.reward(self.model.state, terminal)
        brain_state = star.state(self.model.state)

        if terminal:
            log.info(f'Episode {self.episode_count}: '
                     f'iterations={self.iteration_count:-3.0f} reward={self.episode_reward:-3.1f}')

        return (brain_state, reward, terminal)
示例#4
0
    def simulate(self, brain_action):
        """ run a single step of the simulation.
        if the simulation has reached a terminal state, mark it as such.
        """
        action = star.action(brain_action)

        self.model.step(action)

        if self.iteration_count >= 200:
            terminal = True
        else:
            terminal = star.terminal(self.model.state)

        reward = star.reward(self.model.state, terminal)
        brain_state = star.state(self.model.state)

        return (brain_state, reward, terminal)
 def episode_start(self, parameters=None):
     self.model.reset()
     return star.state(self.model.state)
示例#6
0
 def episode_start(self, parameters=None):
     theta = np.random.uniform(-2 * RAD_PER_DEG, 2 * RAD_PER_DEG)        # +/- 2 degree sd in radians
     self.model.reset(theta=theta)
     return star.state(self.model.state)