Пример #1
0
 def __init__(self, history_mgr=None, *args, **kwargs):
     self.args = args
     self.kwargs = kwargs
     # check if the object is responsible to keep the history of its interaction
     if isinstance(history_mgr, grl.HistoryManager):
         # an external history manager is provided, so no need to keep history in the object
         self.keep_history = False
         self.hm = history_mgr
     else:
         # the object will keep its own history
         self.keep_history = True
         # the initiated history manager uses the internal state_func to get states from histories
         self.hm = grl.HistoryManager(maxlen=self.kwargs.get(
             'max_history', None),
                                      state_map=self.state_func)
     self.sm = grl.StateManager(self.transition_func)
     self.am = grl.ActionManager()
     self.pm = grl.PerceptManager(self.emission_func)
     self.rm = grl.RewardManager(self.reward_func)
     # the order is important if the objects are interacting in a sequence
     self.order = self.kwargs.get('order', math.nan)
     # call the setup function for any object-dependent user-specified configurations
     # from a user perspective, setup() is the entry point of an object
     # the user should not use the internal __init__
     self.setup()
Пример #2
0
    def __init__(self, history_mgr=None, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs

        # If the history_mgr belongs to the HistoryManager class then
        # store the history_mgr as hm, else create a new HistoryManager instance
        if isinstance(history_mgr, grl.HistoryManager):
            self.keep_history = False
            self.hm = history_mgr
        else:
            self.keep_history = True
            self.hm = grl.HistoryManager(maxlen=self.kwargs.get(
                'max_history', None),
                                         state_map=self.state_func)
        self.sm = grl.StateManager(self.transition_func)
        self.am = grl.ActionManager()
        self.pm = grl.PerceptManager(self.emission_func)
        self.rm = grl.RewardManager(self.reward_func)
        self.order = self.kwargs.get('order', math.nan)
        self.setup()
Пример #3
0
def phi_extreme_q(h, *args, **kwargs):
    eps = kwargs.get('eps', 0.1)
    q_func = kwargs.get('q_func', None)
    q = q_func(h, *args, **kwargs)
    q = q // eps
    s = tuple(q[k] for k in sorted(q))
    return s


def phi_last_percept(h, *args, **kwargs):
    # extract last percept
    return h[-1]


history_mgr = grl.HistoryManager(maxlen=10, state_map=phi_extreme_va)
#domain = BlindMaze(history_mgr, maze_len=2)
#domain = SimpleMDP(history_mgr)
domain = SlipperyHill(history_mgr)
#domain = DynamicKeys(history_mgr)
#agent = RandomAgent(history_mgr)
agent = FrequencyAgent(history_mgr,
                       exploration_factor=0.1,
                       discount_factor=0.999)
#agent = GreedyQAgent(history_mgr, value_function_persist=False, exploration_factor=0.3)

# o_domain = domain
# domain = grl.BinaryMock(history_mgr)
# domain.hook(o_domain)

agent.interact(domain)
Пример #4
0
 def setup(self):
     self.am.action_space = [0, 1]
     self.r_dummy = self.kwargs.get('r_dummy', 0.0)
     self.hm_ae = grl.HistoryManager(maxlen=self.hm.maxlen)
     self.domain = None
     self.restrict_A_cache = dict()
Пример #5
0
 def __init__(self, reward_func=lambda h, *x, **y: 0):
     self.reward_func = reward_func
     self.hm = grl.HistoryManager()