def _update(self): """ integrate self.action and set new self.state and self.reward. self.state is coded as follows: index 0: if in start state, this indicates goal up index 1: if in start state, this indicates goal down index 2: this indicates robot is neither at start state nor at the junction index 3: this indicates robot is at the junction. """ Maze._update(self) # set new state self.state = zeros(4) if self.perseus == self.initPos[0]: if self.goUp: self.state[0] = 1 else: self.state[1] = 1 elif self.perseus[1] == self.length + 1: self.state[2] = 1 else: self.state[3] = 1 # set new reward if self.perseus[1] == self.length + 1: if abs(self.perseus[0] - self.goal[0]) == 2: # bad choice taken self.perseus = self.goal self.reward = self.bangReward
def _update(self): Maze._update(self) # set reward if self.goal == self.perseus: self.reward = 1. # self.reset() else: self.reward = 0 # set state self.state = array([self.perseus[0] * self.mazeTable.shape[0] + self.perseus[1]])