Exemplo n.º 1
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0 
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]
Exemplo n.º 2
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]
Exemplo n.º 3
0
 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step=0
     self.epiStep=0
     self.reward=[0.0]
     self.rawReward=0.0
     self.obsSensors=["EdgesReal"]
     self.rewardSensor=[""]
     self.oldReward=0.0
     self.plotString=["World Interactions", "Reward", "Reward on NoReward Task"]    
     self.inDim=len(self.getObservation())
     self.outDim=self.env.actLen        
     self.dif=(self.env.fraktMax-self.env.fraktMin)*self.env.dists[0]
     self.maxSpeed=self.dif/30.0 
     self.picCount=0
     self.epiLen=1
Exemplo n.º 4
0
 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step = 0
     self.epiStep = 0
     self.reward = [0.0]
     self.rawReward = 0.0
     self.obsSensors = ["EdgesReal"]
     self.rewardSensor = [""]
     self.oldReward = 0.0
     self.plotString = [
         "World Interactions", "Reward", "Reward on NoReward Task"
     ]
     self.inDim = len(self.getObservation())
     self.outDim = self.env.actLen
     self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0]
     self.maxSpeed = self.dif / 30.0
     self.picCount = 0
     self.epiLen = 1
Exemplo n.º 5
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = ShipSteeringEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale sensors
        #                          [h,              hdot,           v]
        self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)]

        # actions:              thrust,       rudder
        self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
        # scale reward over episode, such that max. return = 100
        self.rewardscale = 100.0 / maxsteps / self.sensor_limits[2][1]
Exemplo n.º 6
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = ShipSteeringEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale sensors
        #                          [h,              hdot,           v]
        self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0),
                              (-10.0, +40.0)]

        # actions:              thrust,       rudder
        self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
        # scale reward over episode, such that max. return = 100
        self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
Exemplo n.º 7
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower=100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen=500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn=0 #counts the task resets for incrementall learning
        self.env.FricMu=20.0 #We need higher friction for Johnnie
        self.env.dt=0.01 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits=[]
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)]*env.actLen
Exemplo n.º 8
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower = 100.0  #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for Johnnie
        self.env.dt = 0.01  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
Exemplo n.º 9
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower = 100.0
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 1500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for CCRL
        self.env.dt = 0.002  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
        self.oldAction = zeros(env.actLen, float)
        self.dist = zeros(9, float)
        self.dif = array([0.0, 0.0, 0.0])
        self.target = array([-6.5, 1.75, -10.5])
        self.grepRew = 0.0
        self.tableFlag = 0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'],
                                                      "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen = len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen - 2 * self.env.actLen):
            self.sensor_limits.append((-4, 4))
Exemplo n.º 10
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]  # , None, (-pi, pi), None]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-pi, pi))

        self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-10, 10)]
Exemplo n.º 11
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]  #, None, (-pi, pi), None]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-pi, pi))

        self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-10, 10)]
Exemplo n.º 12
0
Arquivo: ccrl.py Projeto: HKou/pybrain
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower=100.0
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen=1500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn=0 #counts the task resets for incrementall learning
        self.env.FricMu=20.0 #We need higher friction for CCRL
        self.env.dt=0.002 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits=[]
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)]*env.actLen
        self.oldAction=zeros(env.actLen, float)
        self.dist=zeros(9, float)
        self.dif=array([0.0,0.0,0.0])
        self.target=array([-6.5,1.75,-10.5])
        self.grepRew=0.0
        self.tableFlag=0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen=len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen-2*self.env.actLen):
            self.sensor_limits.append((-4, 4))
Exemplo n.º 13
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim
Exemplo n.º 14
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim
Exemplo n.º 15
0
 def __init__(self):
     self.environment = SimpleraceEnvironment()
     EpisodicTask.__init__(self, self.environment)
     self.t = 0