Пример #1
0
class DoublePoleEnvironment(Environment):
    """ two poles to be balanced from the same cart. """

    indim = 1
    ooutdim = 6

    def __init__(self):
        self.p1 = CartPoleEnvironment()
        self.p2 = CartPoleEnvironment()
        self.p2.l = 0.05
        self.p2.mp = 0.01
        self.reset()

    def getSensors(self):
        """ returns the state one step (dt) ahead in the future. stores the state in
            self.sensors because it is needed for the next calculation. The sensor return
            vector has 6 elements: theta1, theta1', theta2, theta2', s, s'
            (s being the distance from the origin).
        """
        s1 = self.p1.getSensors()
        s2 = self.p2.getSensors()
        self.sensors = (s1[0], s1[1], s2[0], s2[1], s2[2], s2[3])
        return self.sensors

    def reset(self):
        """ re-initializes the environment, setting the cart back in a random position.
        """
        self.p1.reset()
        self.p2.reset()
        # put cart in the same place:
        self.p2.sensors = (self.p2.sensors[0], self.p2.sensors[1],
                           self.p1.sensors[2], self.p1.sensors[3])
        self.getSensors()

    def performAction(self, action):
        """ stores the desired action for the next runge-kutta step.
        """
        self.p1.performAction(action)
        self.p2.performAction(action)

    def getCartPosition(self):
        """ auxiliary access to just the cart position, to be used by BalanceTask """
        return self.sensors[4]

    def getPoleAngles(self):
        """ auxiliary access to just the pole angle(s), to be used by BalanceTask """
        return [self.sensors[0], self.sensors[2]]
Пример #2
0
class DoublePoleEnvironment(Environment):
    """ two poles to be balanced from the same cart. """

    indim = 1
    ooutdim = 6

    def __init__(self):
        self.p1 = CartPoleEnvironment()
        self.p2 = CartPoleEnvironment()
        self.p2.l = 0.05
        self.p2.mp = 0.01
        self.reset()
        
    def getSensors(self):
        """ returns the state one step (dt) ahead in the future. stores the state in
            self.sensors because it is needed for the next calculation. The sensor return 
            vector has 6 elements: theta1, theta1', theta2, theta2', s, s' 
            (s being the distance from the origin).
        """
        s1 = self.p1.getSensors()
        s2 = self.p2.getSensors()
        self.sensors = (s1[0], s1[1], s2[0], s2[1], s2[2], s2[3])
        return self.sensors
        
    def reset(self):
        """ re-initializes the environment, setting the cart back in a random position.
        """
        self.p1.reset()
        self.p2.reset()
        # put cart in the same place:
        self.p2.sensors = (self.p2.sensors[0], self.p2.sensors[1], self.p1.sensors[2], self.p1.sensors[3])
        self.getSensors()
                    
    def performAction(self, action):
        """ stores the desired action for the next runge-kutta step.
        """
        self.p1.performAction(action)
        self.p2.performAction(action)
        
    def getCartPosition(self):
        """ auxiliary access to just the cart position, to be used by BalanceTask """
        return self.sensors[4]
        
    def getPoleAngles(self):
        """ auxiliary access to just the pole angle(s), to be used by BalanceTask """
        return [self.sensors[0], self.sensors[2]]