class ControloAprendRef(Controlo): def __init__(self, r_max = 100, a = None, s = None): self._r_max = r_max self._a = a self._s = s self._alpha = 1 self._gama = 0.9 self._epsilon = 0.01 self._accoes = dirmov() self._mem_aprend = MemoriaEsparsa() self._mec_sel_accao = SelAccaoEGreedy(self._mem_aprend, self._accoes, self._epsilon) self._mec_aprend = AprendQ(self._mem_aprend, self._mec_sel_accao, self._alpha, self._gama) def _processar(self, percepcao): sn = percepcao.posicao if self._a is not None: r = self._reforco(percepcao, self._s, sn) self._mec_aprend._aprender(self._s,self._a,r,sn) psa.vismod.accaovalordir(self._mem_aprend._memoria, self._accoes) an = self._mec_sel_accao._selecionar_accao(sn) self._s = sn self._a = an if an is not None: return MOVER(an) def _reforco(self, percepcao, s, sn): r = -dist(s,sn) if percepcao.colisao: r += -self._r_max elif percepcao.carga: r += self._r_max return r