def learn_policy(self):
     # Initialize Q-learner.
     qlearner = QLearner( \
         self.state_space,
         self.actions,
         self.handle_action,
         self.reset_training_world )
     
     # Initialize goal states.
     goal_states = []
     print "Enumerating goal states..."
     print self.state_space_dim
     for state_index in xrange(qlearner.r_table.size):
         state = numpy.unravel_index(state_index, qlearner.r_table.shape)
         if state[FullTransform.StateOffset.Arrows] == World.ArrowState.Arrows_Complete:
             goal_states.append(tuple(state))
     print "Goal states: %d" % len(goal_states)
     
     for goal_state in goal_states:
         qlearner.set_r_value( goal_state, 100 )
     
     #print qlearner.r_table
     
     # Run Q-learner.
     print "Total states: %d" % (qlearner.r_table.size)
     qlearner.execute(goal_states, 500000, 50)
     
     # Return policy.
     return qlearner.get_policy()
    def learn_policy(self):
        # Initialize Q-learner.
        qlearner = QLearner(self.state_space, self.actions, self.handle_action, self.reset_training_world)

        # Initialize reward states.
        goal_states = [(PositionTransform.HorizontalState.At + 1, PositionTransform.VerticleState.At + 1)]
        for goal_state in goal_states:
            qlearner.set_r_value(goal_state, 100)

        # print qlearner.r_table

        # Run Q-learner.
        qlearner.execute(goal_states, 300, 50)

        # Return policy.
        return qlearner.get_policy()
 def learn_policy(self):
     # Initialize Q-learner.
     qlearner = QLearner( \
         self.state_space,
         self.actions,
         self.handle_action,
         self.reset_training_world )
     
     # Initialize reward states.
     goal_states = [( self.state_space[0].index(World.SiteState.Useless), )]
     for goal_state in goal_states:
         qlearner.set_r_value( goal_state, 100 )
     
     #print qlearner.r_table
     
     # Run Q-learner.
     qlearner.execute(goal_states, 300, 30)
     
     # Return policy.
     return qlearner.get_policy()