def __init__(self, history_mgr=None, *args, **kwargs): self.args = args self.kwargs = kwargs # check if the object is responsible to keep the history of its interaction if isinstance(history_mgr, grl.HistoryManager): # an external history manager is provided, so no need to keep history in the object self.keep_history = False self.hm = history_mgr else: # the object will keep its own history self.keep_history = True # the initiated history manager uses the internal state_func to get states from histories self.hm = grl.HistoryManager(maxlen=self.kwargs.get( 'max_history', None), state_map=self.state_func) self.sm = grl.StateManager(self.transition_func) self.am = grl.ActionManager() self.pm = grl.PerceptManager(self.emission_func) self.rm = grl.RewardManager(self.reward_func) # the order is important if the objects are interacting in a sequence self.order = self.kwargs.get('order', math.nan) # call the setup function for any object-dependent user-specified configurations # from a user perspective, setup() is the entry point of an object # the user should not use the internal __init__ self.setup()
def __init__(self, history_mgr=None, *args, **kwargs): self.args = args self.kwargs = kwargs # If the history_mgr belongs to the HistoryManager class then # store the history_mgr as hm, else create a new HistoryManager instance if isinstance(history_mgr, grl.HistoryManager): self.keep_history = False self.hm = history_mgr else: self.keep_history = True self.hm = grl.HistoryManager(maxlen=self.kwargs.get( 'max_history', None), state_map=self.state_func) self.sm = grl.StateManager(self.transition_func) self.am = grl.ActionManager() self.pm = grl.PerceptManager(self.emission_func) self.rm = grl.RewardManager(self.reward_func) self.order = self.kwargs.get('order', math.nan) self.setup()
def phi_extreme_q(h, *args, **kwargs): eps = kwargs.get('eps', 0.1) q_func = kwargs.get('q_func', None) q = q_func(h, *args, **kwargs) q = q // eps s = tuple(q[k] for k in sorted(q)) return s def phi_last_percept(h, *args, **kwargs): # extract last percept return h[-1] history_mgr = grl.HistoryManager(maxlen=10, state_map=phi_extreme_va) #domain = BlindMaze(history_mgr, maze_len=2) #domain = SimpleMDP(history_mgr) domain = SlipperyHill(history_mgr) #domain = DynamicKeys(history_mgr) #agent = RandomAgent(history_mgr) agent = FrequencyAgent(history_mgr, exploration_factor=0.1, discount_factor=0.999) #agent = GreedyQAgent(history_mgr, value_function_persist=False, exploration_factor=0.3) # o_domain = domain # domain = grl.BinaryMock(history_mgr) # domain.hook(o_domain) agent.interact(domain)
def setup(self): self.am.action_space = [0, 1] self.r_dummy = self.kwargs.get('r_dummy', 0.0) self.hm_ae = grl.HistoryManager(maxlen=self.hm.maxlen) self.domain = None self.restrict_A_cache = dict()
def __init__(self, reward_func=lambda h, *x, **y: 0): self.reward_func = reward_func self.hm = grl.HistoryManager()