def punishLastAction(self, howmuch): super().punishLastAction(howmuch) if self.containers.showscreen: infoscreen.print(str(-abs(howmuch)), time.strftime("%H:%M:%S", time.gmtime()), containers=self.containers, wname="Last big punish")
def learnANN(self): tmp = super().learnANN() print("ReinfLearnSteps:", self.model.step(), level=3) if self.containers.showscreen: infoscreen.print(self.model.step(), "Iterations: >" + str(self.model.run_inferences()), containers=self.containers, wname="ReinfLearnSteps") return tmp
def policyAction(self, agentState): action, qvals = self.model.inference(self.makeInferenceUsable(agentState)) #former is argmax, latter are individual qvals throttle, brake, steer = self.dediscretize(action[0]) toUse = "["+str(throttle)+", "+str(brake)+", "+str(steer)+"]" self.showqvals(qvals[0]) if self.containers.showscreen: infoscreen.print(toUse, containers=self.containers, wname="Last command") if self.model.run_inferences() % 100 == 0: infoscreen.print(self.model.step(), "Iterations: >"+str(self.model.run_inferences()), containers=self.containers, wname="ReinfLearnSteps") return toUse, (throttle, brake, steer) #er returned immer toUse, toSave
def showqvals(self, qvals): amount = self.conf.steering_steps*4 if self.conf.INCLUDE_ACCPLUSBREAK else self.conf.steering_steps*3 b = [] for i in range(amount): a = [0]*amount a[i] = 1 b.append(str(self.dediscretize(a))) b = list(zip(b, qvals)) toprint = [str(i[0])[1:-1]+": "+str(i[1]) for i in b] toprint = "\n".join(toprint) print(b, level=3) if self.containers.showscreen: infoscreen.print(toprint, containers= self.containers, wname="Current Q Vals")
def addToMemory(self, gameState, pastState): a, r, qval, count, changestring = super().addToMemory( gameState, pastState) if self.containers.showscreen: infoscreen.print(a, round(r, 2), round(qval, 2), changestring, containers=self.containers, wname="Last memory") if len(self.memory) % 20 == 0: infoscreen.print(">" + str(len(self.memory)), containers=self.containers, wname="Memorysize")
def policyAction(self, agentState): action, _ = self.model.inference(self.makeInferenceUsable(agentState)) action = self.make_noisy(action[0]) action = [round(i, 3) for i in action] toUse = "[" + str(action[0]) + ", " + str(action[1]) + ", " + str( action[2]) + "]" if self.containers.showscreen: infoscreen.print(toUse, containers=self.containers, wname="Last command") if self.model.run_inferences() % 100 == 0: infoscreen.print(self.model.step(), "Iterations: >" + str(self.model.run_inferences()), containers=self.containers, wname="ReinfLearnSteps") infoscreen.print(self.epsilon, containers=self.containers, wname="Epsilon") return toUse, action
def eval_episodeVals(self, mem_epi_slice, gameState, endReason): string = super().eval_episodeVals(mem_epi_slice, gameState, endReason) if self.containers.showscreen: infoscreen.print(string, containers=self.containers, wname="Last Epsd")
def randomAction(self, agentState): toUse, toSave = super().randomAction(agentState) if self.containers.showscreen: infoscreen.print(toUse, "(random)", containers=self.containers, wname="Last command") infoscreen.print(self.epsilon, containers=self.containers, wname="Epsilon") return toUse, toSave