def add_experience(self, state, action, reward, next_state, done): '''Interface helper method for update() to add experience to memory''' self.most_recent = (state, action, reward, next_state, done) for idx, k in enumerate(self.data_keys): self.cur_epi_data[k].append(self.most_recent[idx]) # If episode ended, add to memory and clear cur_epi_data if util.epi_done(done): for k in self.data_keys: getattr(self, k).append(self.cur_epi_data[k]) self.cur_epi_data = {k: [] for k in self.data_keys} # If agent has collected the desired number of episodes, it is ready to train # length is num of epis due to nested structure if len(self.states ) == self.body.agent.algorithm.training_frequency: self.body.agent.algorithm.to_train = 1 # Track memory size and num experiences self.size += 1 self.seen_size += 1
def run_rl(self): '''Run the main RL loop until clock.max_frame''' logger.info( f'Running RL loop for trial {self.spec["meta"]["trial"]} session {self.index}' ) clock = self.env.clock state = self.env.reset() done = False while True: if util.epi_done(done): # before starting another episode self.try_ckpt(self.agent, self.env) if clock.get() < clock.max_frame: # reset and continue clock.tick('epi') state = self.env.reset() done = False self.try_ckpt(self.agent, self.env) if clock.get() >= clock.max_frame: # finish break clock.tick('t') action = self.agent.act(state) next_state, reward, done, info = self.env.step(action) self.agent.update(state, action, reward, next_state, done) state = next_state