def setState(self, observation): self.lstate = GridworldEnv.state2str(observation) if self.lstate not in self.Q.keys(): self.Q[self.lstate] = np.zeros(self.nb_action) if random.uniform(0, 1) < self.epsilon: self.laction = np.random.randint(self.nb_action) else: self.laction = np.argmax([self.Q[self.lstate]])
def act(self, observation, reward, done): obs = GridworldEnv.state2str(observation) if obs not in self.Q.keys(): self.Q[obs] = np.zeros(self.nb_action) if random.uniform(0, 1) < self.epsilon: self.laction = np.random.randint(self.nb_action) else: self.laction = np.argmax([self.Q[obs]]) self._update_Qvalue(reward, obs, done) self.lstate = obs return self.laction
def _total_reward(self): """Sum of rewards expected for every state""" return sum(self.value[state] for state in self.mdp.keys()) obs = GridworldEnv.state2str(observation) if obs in self.Q.keys(): self.Q[obs] = np.zeros(self.nb_action) if random.uniform(0, 1) < self.epsilon: self.laction = np.random.randint(self.nb_action) else: self.laction = np.argmax([self.Q[obs]]) self._update_Qvalue(reward, obs, done) self.lstate = obs return self.laction
def setState(self, observation): self.lstate = GridworldEnv.state2str(observation) if self.lstate not in self.Q.keys(): self.Q[self.lstate] = np.zeros(self.nb_action)
def act(self, observation, reward, done): # get action for current state # obs = str(obs.tolist()) obs = GridworldEnv.state2str(observation) action = self.policy[obs] return action