def run(self): event = self._sim.get_next_event() if isinstance(event, EpisodeStartEvent): log.event("Episode Start") observation = self._sim.gym_episode_start(params(event)) event.initial_state = state(observation) elif isinstance(event, SimulateEvent): log.event("Simulate") obs, rwd, done, _ = self._sim.gym_simulate(action(event)) event.state = state(obs) event.reward = reward(rwd) event.terminal = terminal(done) self._episode_reward += rwd self._sim.render() elif isinstance(event, EpisodeFinishEvent): log.event("Episode Finish") print("Episode {} reward: {}".format( self._episode_count, self._episode_reward)) self._episode_count += 1 self._episode_reward = 0 elif isinstance(event, FinishedEvent): log.event("Finished") return False elif event is None: return False return True
def episode_start(self, parameters=None): """ called at the start of every episode. should reset the simulation and return the initial state """ log.info('Episode {} Starting'.format(self.episode_count)) self.model.reset() return star.state(self.model.state)
def simulate(self, brain_action): action = star.action(brain_action) self.model.step(action) terminal = star.terminal(self.model.state) reward = star.reward(self.model.state, terminal) brain_state = star.state(self.model.state) if terminal: log.info(f'Episode {self.episode_count}: ' f'iterations={self.iteration_count:-3.0f} reward={self.episode_reward:-3.1f}') return (brain_state, reward, terminal)
def simulate(self, brain_action): """ run a single step of the simulation. if the simulation has reached a terminal state, mark it as such. """ action = star.action(brain_action) self.model.step(action) if self.iteration_count >= 200: terminal = True else: terminal = star.terminal(self.model.state) reward = star.reward(self.model.state, terminal) brain_state = star.state(self.model.state) return (brain_state, reward, terminal)
def episode_start(self, parameters=None): self.model.reset() return star.state(self.model.state)
def episode_start(self, parameters=None): theta = np.random.uniform(-2 * RAD_PER_DEG, 2 * RAD_PER_DEG) # +/- 2 degree sd in radians self.model.reset(theta=theta) return star.state(self.model.state)