def observationFunction(self, state): if not self.lastState is None: reward = 0.0 # reward -= 1. self.observeTransition(self.lastState, self.lastAction, state, reward) return CaptureAgent.observationFunction(self, state)
def observationFunction(self, state): """ This is where we ended up after our last action. The simulation should somehow ensure this is called """ CaptureAgent.observationFunction(self, state) if not self.lastState is None: # get the socre change by the new move, compared to the last move reward = state.getScore() - self.lastState.getScore() #print("reward is", reward) if reward != 0: print(reward) print("____the weights value are ", self.weights) # pass the reward added to the observeTransition function self.observeTransition(self.lastState, self.lastAction, state, reward) return state
def observationFunction(self, currentGameState): if self.lastState: # find the score change between each step rewardChange = (currentGameState.getScore() - self.lastState.getScore()) # rewardChange = 1 # update 1-step Q values self.observeTransition(self.lastState, self.lastAction, currentGameState, rewardChange) return CaptureAgent.observationFunction(self, currentGameState)
def observationFunction(self, state): """ This is where we ended up after our last action. The simulation should somehow ensure this is called """ if not self.lastState is None: reward = (state.getScore() - self.lastState.getScore()) self.observeTransition(self.lastState, self.lastAction, state, reward) return CaptureAgent.observationFunction(self, state)
def observationFunction(self, state): """ This is where we ended up after our last action. The simulation should somehow ensure this is called """ if not self.lastState is None: reward = (state.getScore() - self.lastState.getScore()) self.observeTransition(self.lastState, self.lastAction, state, reward) return CaptureAgent.observationFunction(self, state)
def observationFunction(self, state): if not self.has_no_observation and \ self.getPreviousObservation() is not None and \ self.isInTraining(): self.makeUpdate() return CaptureAgent.observationFunction(self, state)
def observationFunction(self, state): if not self.lastState is None: reward = (state.getScore() - self.lastState.getScore()) self.observeTransition(self.lastState, self.lastAction, state, reward) return CaptureAgent.observationFunction(self, state)