def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     values = {}
     qValues = {}
     policy = {}
     for state in states:
         values[state] = agent.computeValueFromQValues(state)
         policy[state] = agent.computeActionFromQValues(state)
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if not qValues.has_key(action):
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     valuesPretty = self.prettyValues(values)
     policyPretty = self.prettyPolicy(policy)
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
 def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     values = {}
     qValues = {}
     policy = {}
     for state in states:
         values[state] = agent.computeValueFromQValues(state)
         policy[state] = agent.computeActionFromQValues(state)
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if not qValues.has_key(action):
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     valuesPretty = self.prettyValues(values)
     policyPretty = self.prettyPolicy(policy)
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
Exemplo n.º 3
0
 def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts)
     states = [state for state in self.grid.getStates() if len(self.grid.getPossibleActions(state)) > 0]
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     qValues = {}
     weights = agent.getWeights()
     for state in states:
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if action not in qValues:
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (qValuesPretty, weights, actions, lastExperience)
Exemplo n.º 4
0
 def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict["qlearningAgents"].ApproximateQAgent(extractor=self.extractor, **self.opts)
     states = filter(lambda state: len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     qValues = {}
     weights = agent.getWeights()
     for state in states:
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if not qValues.has_key(action):
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (qValuesPretty, weights, actions, lastExperience)
 def runAgent(self, moduleDict):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     for i in range(self.numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         agent.update(startState, action, endState, reward)
     return agent
 def runAgent(self, moduleDict):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     for i in range(self.numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         agent.update(startState, action, endState, reward)
     return agent
 def run_agent(self, module_dict, num_experiences):
     agent = module_dict['q_learning_agents'].QLearningAgent(**self.opts)
     states = [
         state for state in self.grid.get_states()
         if len(self.grid.get_possible_actions(state)) > 0
     ]
     states.sort()
     rand_obj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     last_experience = None
     for i in range(num_experiences):
         start_state = rand_obj.choice(states)
         action = rand_obj.choice(
             self.grid.get_possible_actions(start_state))
         (end_state,
          reward) = self.env.get_random_next_state(start_state,
                                                   action,
                                                   rand_obj=rand_obj)
         last_experience = (start_state, action, end_state, reward)
         agent.update(*last_experience)
     actions = list(
         reduce(lambda a, b: set(a).union(b),
                [self.grid.get_possible_actions(state)
                 for state in states]))
     values = {}
     q_values = {}
     policy = {}
     for state in states:
         values[state] = agent.compute_value_from_q_values(state)
         policy[state] = agent.compute_action_from_q_values(state)
         possible_actions = self.grid.get_possible_actions(state)
         for action in actions:
             if action not in q_values:
                 q_values[action] = {}
             if action in possible_actions:
                 q_values[action][state] = agent.get_q_value(state, action)
             else:
                 q_values[action][state] = None
     values_pretty = self.pretty_values(values)
     policy_pretty = self.pretty_policy(policy)
     q_values_pretty = {}
     for action in actions:
         q_values_pretty[action] = self.pretty_values(q_values[action])
     return (values_pretty, q_values_pretty, actions, policy_pretty,
             last_experience)
 def run_agent(self, module_dict):
     agent = module_dict['q_learning_agents'].QLearningAgent(**self.opts)
     states = [
         state for state in self.grid.get_states()
         if len(self.grid.get_possible_actions(state)) > 0
     ]
     states.sort()
     rand_obj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     for i in range(self.num_experiences):
         start_state = rand_obj.choice(states)
         action = rand_obj.choice(
             self.grid.get_possible_actions(start_state))
         (end_state,
          reward) = self.env.get_random_next_state(start_state,
                                                   action,
                                                   rand_obj=rand_obj)
         agent.update(start_state, action, end_state, reward)
     return agent
Exemplo n.º 9
0
 def run_agent(self, module_dict, num_experiences):
     agent = module_dict['q_learning_agents'].ApproximateQAgent(
         extractor=self.extractor, **self.opts)
     states = filter(
         lambda state: len(self.grid.get_possible_actions(state)) > 0,
         self.grid.get_states())
     states.sort()
     rand_obj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     last_experience = None
     for i in range(num_experiences):
         start_state = rand_obj.choice(states)
         action = rand_obj.choice(
             self.grid.get_possible_actions(start_state))
         (end_state,
          reward) = self.env.get_random_next_state(start_state,
                                                   action,
                                                   rand_obj=rand_obj)
         last_experience = (start_state, action, end_state, reward)
         agent.update(*last_experience)
     actions = list(
         reduce(lambda a, b: set(a).union(b),
                [self.grid.get_possible_actions(state)
                 for state in states]))
     q_values = {}
     weights = agent.get_weights()
     for state in states:
         possible_actions = self.grid.get_possible_actions(state)
         for action in actions:
             if not q_values.has_key(action):
                 q_values[action] = {}
             if action in possible_actions:
                 q_values[action][state] = agent.get_q_value(state, action)
             else:
                 q_values[action][state] = None
     q_values_pretty = {}
     for action in actions:
         q_values_pretty[action] = self.pretty_values(q_values[action])
     return (q_values_pretty, weights, actions, last_experience)