Exemplo n.º 1
0
 def reset(self):
     self.reward[0] = 0.0
     self.rawReward = 0.0
     self.env.reset()
     self.action = [self.env.dists[0]] * self.outDim
     self.epiStep = 0
     EpisodicTask.reset(self)
Exemplo n.º 2
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]
 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant = True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
Exemplo n.º 4
0
 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """                
     # scaling
     self.incStep()
     action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action = action.copy()
Exemplo n.º 5
0
 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment   
     #The standard Johnnie task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn             
     isJoints=self.env.getSensorByName('JointSensor') #The joint angles
     isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall  
     action=tanh((act-isJoints-isSpeeds)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)
 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment   
     #The standard CCRL task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn             
     self.oldAction = action
     #Grasping as reflex depending on the distance to target - comment in for more easy grasping
     #if abs(self.dist[2])<2.0: action[15]=(1.0+2.0*action[15])*.3333 #self.grepRew=action[15]*.01
     #else: action[15]=(-1.0+2.0*action[15])*.3333 #self.grepRew=action[15]*-.03
     isJoints=array(self.env.getSensorByName('JointSensor')) #The joint angles
     isSpeeds=array(self.env.getSensorByName('JointVelocitySensor')) #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall  
     action=tanh((act-isJoints-0.9*isSpeeds*self.env.tourqueList)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)
Exemplo n.º 7
0
 def __init__(self, env=None, maxsteps=1000):
     """
     :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
     :key maxsteps: maximal number of steps (default: 1000) 
     """
     if env == None:
         env = CartPoleEnvironment()
     EpisodicTask.__init__(self, env) 
     self.N = maxsteps
     self.t = 0
     
     # no scaling of sensors
     self.sensor_limits = [None] * 2
     
     # scale actor
     self.actor_limits = [(-50, 50)]
Exemplo n.º 8
0
 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step = 0
     self.epiStep = 0
     self.reward = [0.0]
     self.rawReward = 0.0
     self.obsSensors = ["EdgesReal"]
     self.rewardSensor = [""]
     self.oldReward = 0.0
     self.plotString = ["World Interactions", "Reward", "Reward on NoReward Task"]    
     self.inDim = len(self.getObservation())
     self.outDim = self.env.actLen        
     self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0]
     self.maxSpeed = self.dif / 30.0 
     self.picCount = 0
     self.epiLen = 1
Exemplo n.º 9
0
 def __init__(self, env=None, maxsteps=1000):
     """
     :key env: (optional) an instance of a ShipSteeringEnvironment (or a subclass thereof)
     :key maxsteps: maximal number of steps (default: 1000) 
     """
     if env == None:
         env = ShipSteeringEnvironment(render=False)
     EpisodicTask.__init__(self, env) 
     self.N = maxsteps
     self.t = 0
     
     # scale sensors
     #                          [h,              hdot,           v] 
     self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)]
     
     # actions:              thrust,       rudder       
     self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
     # scale reward over episode, such that max. return = 100
     self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
Exemplo n.º 10
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0 #counts the task resets for incrementall learning
        self.env.FricMu = 20.0 #We need higher friction for Johnnie
        self.env.dt = 0.01 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
Exemplo n.º 11
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower = 100.0
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 1500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for CCRL
        self.env.dt = 0.002  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
        self.oldAction = zeros(env.actLen, float)
        self.dist = zeros(9, float)
        self.dif = array([0.0, 0.0, 0.0])
        self.target = array([-6.5, 1.75, -10.5])
        self.grepRew = 0.0
        self.tableFlag = 0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'],
                                                      "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen = len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen - 2 * self.env.actLen):
            self.sensor_limits.append((-4, 4))
Exemplo n.º 12
0
 def __init__(self, env=None, maxsteps=1000, desiredValue = 0):
     """
     :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
     :key maxsteps: maximal number of steps (default: 1000) 
     """
     self.desiredValue = desiredValue
     if env == None:
         env = CartPoleEnvironment()
     EpisodicTask.__init__(self, env) 
     self.N = maxsteps
     self.t = 0
     
     # scale position and angle, don't scale velocities (unknown maximum)
     self.sensor_limits = [(-3, 3)]#, None, (-pi, pi), None]
     for i in range(1, self.outdim):
         if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
             self.sensor_limits.append(None)
         else:
             self.sensor_limits.append((-pi, pi))
     
     self.sensor_limits = [None] * 4
     # actor between -10 and 10 Newton
     self.actor_limits = [(-50, 50)]
 def reset(self):
     EpisodicTask.reset(self)
     self.t = 0
 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)
Exemplo n.º 15
0
 def getObservation(self):
     self.state = EpisodicTask.getObservation(self)
     return self.state
 def reset(self):
     self.steps = 0
     EpisodicTask.reset(self)
Exemplo n.º 17
0
 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.action = action
Exemplo n.º 18
0
 def __init__(self):
     self.environment = SimpleraceEnvironment()
     EpisodicTask.__init__(self, self.environment)
     self.t = 0
Exemplo n.º 19
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim