Пример #1
0
 def react(self, h, a):
     happy_ratio = h.stats.get(type(self).__name__, dict()).get(':)', 0.0)
     self.sm.hm.record([self.sm.state])
     if a == self.optimal_actions[self.sm.state]:
         e_next = grl.epsilon_sample(self.pm.percept_space, ':)', 1 - happy_ratio)
         if e_next == ':)':
             self.sm.state = (self.sm.state + 1) % len(self.sm.state_space)
     else:
         e_next = ':('
     return e_next
Пример #2
0
 def react(self, h, a):
     bang_ratio = h.stats.get(type(self).__name__, dict()).get('#', self.pmin)
     self.sm.hm.record([self.sm.state])
     if a == self.optimal_actions[self.sm.state]:
         e_next = grl.epsilon_sample(self.pm.percept_space, '#', 1 - bang_ratio)
         if (e_next == '#' and self.sm.state == 0) or (e_next == '@' and self.sm.state == 1):
             self.sm.state = (self.sm.state + 1) % len(self.sm.state_space)
     else:
         self.sm.state = 0
         e_next = '@'
     return e_next
Пример #3
0
 def act(self, h):
     self.pi, self.v = grl.PITabular(self.p,
                                     self.r,
                                     self.v,
                                     self.pi,
                                     g=self.g,
                                     steps=1,
                                     vi_steps=1)
     # Oracle Alert!
     s = self.hm.state(h, g=self.g, q_func=self.oracle)
     return grl.epsilon_sample(self.am.action_space, self.pi[s].argmax(),
                               self.xpl)
Пример #4
0
 def reset(self):
     self.sm.state = grl.epsilon_sample(self.sm.state_space)
Пример #5
0
 def setup(self):
     self.sm.state_space = ['s-left', 's-right']
     self.am.action_space = ['left', 'right']
     self.sm.state = grl.epsilon_sample(self.sm.state_space)
Пример #6
0
 def act(self, h):
     return grl.epsilon_sample(self.am.actions)
Пример #7
0
 def act(self, h):
     s = self.hm.state(h)
     self.am.action = grl.epsilon_sample(self.am.action_space,
                                         max(self.Q[s])[1], 0.1)
     return self.am.action
Пример #8
0
 def start(self, e=None, order=1):
     self.order = order
     self.am.action = grl.epsilon_sample(self.am.action_space)
     return self.am.action