def reset(self): self.reward[0] = 0.0 self.rawReward = 0.0 self.env.reset() self.action = [self.env.dists[0]] * self.outDim self.epiStep = 0 EpisodicTask.reset(self)
def __init__(self, environment): EpisodicTask.__init__(self, environment) self.reward_history = [] self.count = 0 # normalize to (-1, 1) self.sensor_limits = [(-pi, pi), (-20, 20)] self.actor_limits = [(-1, 1)]
def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant=True) self.steps += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): action = self.action_from_joint_angles(action) # Carry out the action based on angular velocities EpisodicTask.performAction(self, action) return
def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant = True) self.steps += 1 EpisodicTask.performAction(self, action)
def __init__(self, env=None, maxsteps=1000, desiredValue=0, location="Portland, OR"): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ self.location = location self.airport_code = weather.airport(location) self.desiredValue = desiredValue if env is None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-np.pi, np.pi)) # self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-50, 50)]
def __init__(self, env=None, maxsteps=1000, desiredValue=0, location='Portland, OR'): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ self.location = location self.airport_code = weather.airport(location) self.desiredValue = desiredValue if env is None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-np.pi, np.pi)) # self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-50, 50)]
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Johnnie task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn isJoints=self.env.getSensorByName('JointSensor') #The joint angles isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall action=tanh((act-isJoints-isSpeeds)*16.0)*self.maxPower*self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def __init__(self,environment, maxSteps, goalTolerance): EpisodicTask.__init__(self,environment) self.env = environment self.count = 0 self.atGoal = False self.MAX_STEPS = maxSteps self.GOAL_TOLERANCE = goalTolerance self.oldDist = 0 self.reward = 0
def performAction(self, action): """ a filtered mapping towards performAction of the underlying environment. """ # scaling self.incStep() action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0] #Clipping the maximal change in actions (max force clipping) action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed) EpisodicTask.performAction(self, action) self.action = action.copy()
def __init__(self, environment=None, batchSize=1): self.batchSize = batchSize if environment is None: self.env = Lander() else: self.env = environment EpisodicTask.__init__(self, self.env) self.sensor_limits = [(0.0, 200.0), (0.0, 35.0), (0.0, 4.0), (-6.0, 6.0), (-0.4, 0.4), (-0.15, 0.15), (0.0, 200.0)]
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Johnnie task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn isJoints = self.env.getSensorByName('JointSensor') #The joint angles isSpeeds = self.env.getSensorByName( 'JointVelocitySensor') #The joint angular velocitys act = (action + 1.0) / 2.0 * ( self.env.cHighList - self.env.cLowList ) + self.env.cLowList #norm output to action intervall action = tanh((act - isJoints - isSpeeds) * 16.0) * self.maxPower * self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard CCRL task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn self.oldAction = action #Grasping as reflex depending on the distance to target - comment in for more easy grasping if abs(abs(self.dist[:3]).sum())<2.0: action[15]=1.0 #self.grepRew=action[15]*.01 else: action[15]=-1.0 #self.grepRew=action[15]*-.03 isJoints=array(self.env.getSensorByName('JointSensor')) #The joint angles isSpeeds=array(self.env.getSensorByName('JointVelocitySensor')) #The joint angular velocitys act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall action=tanh((act-isJoints-0.9*isSpeeds*self.env.tourqueList)*16.0)*self.maxPower*self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Tetra2 task uses a PID controller to control directly angles instead of forces #This makes most tasks much simpler to learn isJoints=self.env.getSensorByName('JointSensor') #The joint angles #print "Pos:", [int(i*10) for i in isJoints] isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys #print "Speeds:", [int(i*10) for i in isSpeeds] #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID #print "Action", act[:5] EpisodicTask.performAction(self, action *self.maxPower*self.env.tourqueList)
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a ChemotaxisEnv (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = ChemotaxisEnv() self.env = env EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 #self.actor_limits = [(0,1), (0,1)] # scale (-1,1) to motor neurons self.sensor_limits = [(0,1), (0,1)] # scale sensor neurons to (-1,1)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Tetra2 task uses a PID controller to control directly angles instead of forces #This makes most tasks much simpler to learn isJoints = self.env.getSensorByName('JointSensor') #The joint angles #print "Pos:", [int(i*10) for i in isJoints] isSpeeds = self.env.getSensorByName( 'JointVelocitySensor') #The joint angular velocitys #print "Speeds:", [int(i*10) for i in isSpeeds] #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID #print "Action", act[:5] EpisodicTask.performAction( self, action * self.maxPower * self.env.tourqueList)
def __init__(self, env = None, maxsteps = 1000): if env == None: env = CarEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # (vel, deg, dist_to_goal, dir_of_goal, direction_diff) self.sensor_limits = [(-30.0, 100.0), (0.0, 360.0), (0.0, variables.grid_size*2), (-180.0, 180.0), (-180.0, 180.0)] self.actor_limits = [(-1.0, +4.5), (-90.0, +90.0)] self.rewardscale = 100.0 / env.distance_to_goal self.total_reward = 0.0
def __init__(self, env=None, maxsteps=1000, desiredValue = 0, tolorance = 0.3): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ self.desiredValue = desiredValue EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 self.tolorance = tolorance # self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-15, 15)]
def __init__(self, env): EpisodicTask.__init__(self, env) self.step = 0 self.epiStep = 0 self.reward = [0.0] self.rawReward = 0.0 self.obsSensors = ["EdgesReal"] self.rewardSensor = [""] self.oldReward = 0.0 self.plotString = ["World Interactions", "Reward", "Reward on NoReward Task"] self.inDim = len(self.getObservation()) self.outDim = self.env.actLen self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0] self.maxSpeed = self.dif / 30.0 self.picCount = 0 self.epiLen = 1
def __init__( self, environment, sort_beliefs=True, do_decay_beliefs=True, uniform_initial_beliefs=True, max_steps=30 ): EpisodicTask.__init__(self, environment) self.verbose = False self.listActions = False self.env = environment self.uniform_initial_beliefs = uniform_initial_beliefs self.max_steps = max_steps self.rewardscale = 1.0 # /self.max_steps self.state_ids = {"init": 0, "grasped": 1, "lifted": 2, "placed": 3} self.initialize()
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # no scaling of sensors self.sensor_limits = [None] * 2 # scale actor self.actor_limits = [(-50, 50)]
def __init__(self, env): EpisodicTask.__init__(self, env) self.pause = False # Holds all rewards given in each episode self.reward_history = [] # The current timestep counter self.count = 0 # The number of timesteps in the episode self.epiLen = 1500 # Counts the task resets for incremental learning self.incLearn = 0 # Sensor limit values can be used to normalize the sensors from # (low, high) to (-1.0, 1.0) given the low and high values. We want to # maintain all units by keeping the results unnormalized for now. # Keeping the values of these lists at None skips the normalization. self.sensor_limits = None self.actor_limits = None # Create all current joint observation attributes self.joint_angles = [] # [rad] self.joint_velocities = [] # [rad/s] # Create the attribute for current tooltip position (x, y, z) [m] self.tooltip_position = [] # [m] # Create all joint angle [rad] limit attributes self.joint_max_angles = [] self.joint_min_angles = [] # Create all joint velocity [rad/s] and torque [N*m] limit attributes self.joint_max_velocities = [] self.joint_max_torques = [] # Call the setter function for the joint limit attributes self.set_joint_angle_limits() self.set_joint_velocity_limits() self.set_joint_torque_limits() return
def __init__(self, environment, sort_beliefs=True, do_decay_beliefs=True, uniform_initial_beliefs=True, max_steps=30): EpisodicTask.__init__(self, environment) self.verbose = False self.listActions = False self.env = environment self.uniform_initial_beliefs = uniform_initial_beliefs self.max_steps = max_steps self.rewardscale = 1.0 #/self.max_steps self.state_ids = {'init': 0, 'grasped': 1, 'lifted': 2, 'placed': 3} self.initialize()
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a ShipSteeringEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = ShipSteeringEnvironment(render=False) EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale sensors # [h, hdot, v] self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)] # actions: thrust, rudder self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)] # scale reward over episode, such that max. return = 100 self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
def __init__(self, env = None, maxtime = 25, timestep = 0.1, logging = False): """ :key env: (optional) an instance of a DockingEnvironment (or a subclass thereof) :key maxtime: (optional) maximum number per task (default: 25) """ if env == None: env = DockingEnvironment() EpisodicTask.__init__(self, env) self.maxTime = maxtime self.dt = timestep self.env.dt = self.dt self.t = 0 self.logging = logging self.logfileName = 'logging_' + str() + '.txt' # actions: u_l, u_r self.actor_limits = [(-0.1, +0.1), (-0.1, +0.1), (0.0, 1.0)] self.lastFitness = 0.0 self.bestFitness = 0.0
def __init__(self, env): EpisodicTask.__init__(self, env) self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for Johnnie self.env.dt = 0.01 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen
def __init__(self, env): EpisodicTask.__init__(self, env) #Overall maximal tourque - is multiplied with relative max tourque for individual joint. self.maxPower = 100.0 self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 1500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for CCRL self.env.dt = 0.002 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append( (self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen self.oldAction = zeros(env.actLen, float) self.dist = zeros(9, float) self.dif = array([0.0, 0.0, 0.0]) self.target = array([-6.5, 1.75, -10.5]) self.grepRew = 0.0 self.tableFlag = 0.0 self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos")) self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos")) #we changed sensors so we need to update environments sensorLength variable self.env.obsLen = len(self.env.getSensors()) #normalization for the task spezific sensors for i in range(self.env.obsLen - 2 * self.env.actLen): self.sensor_limits.append((-4, 4)) self.actor_limits = None
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard CCRL task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn self.oldAction = action #Grasping as reflex depending on the distance to target - comment in for more easy grasping if abs(abs(self.dist[:3]).sum()) < 2.0: action[15] = 1.0 #self.grepRew=action[15]*.01 else: action[15] = -1.0 #self.grepRew=action[15]*-.03 isJoints = array( self.env.getSensorByName('JointSensor')) #The joint angles isSpeeds = array(self.env.getSensorByName( 'JointVelocitySensor')) #The joint angular velocitys act = (action + 1.0) / 2.0 * ( self.env.cHighList - self.env.cLowList ) + self.env.cLowList #norm output to action intervall action = tanh( (act - isJoints - 0.9 * isSpeeds * self.env.tourqueList) * 16.0) * self.maxPower * self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def __init__(self, env): EpisodicTask.__init__(self, env) self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for Johnnie self.env.dt = 0.01 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append( (self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen
def getObservation(self): sensors = EpisodicTask.getObservation(self) # Get the current joint angles [rad] # The joint angle values will be between -pi and pi in radians self.joint_angles = np.asarray(self.env.getSensorByName('JointSensor')) # Get the current joint velocities [rad/s] self.joint_velocities = np.asarray(self.env.getSensorByName('JointVelocitySensor')) # Get the current tooltip position (x, y, z) [m] self.tooltip_position = np.asarray(self.env.getSensorByName('tooltipPos')) return sensors
def __init__(self, env): EpisodicTask.__init__(self, env) #Overall maximal tourque - is multiplied with relative max tourque for individual joint. self.maxPower = 100.0 self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 1500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for CCRL self.env.dt = 0.002 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen self.oldAction = zeros(env.actLen, float) self.dist = zeros(9, float) self.dif = array([0.0, 0.0, 0.0]) self.target = array([-6.5, 1.75, -10.5]) self.grepRew = 0.0 self.tableFlag = 0.0 self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos")) self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos")) self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos")) self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos")) #we changed sensors so we need to update environments sensorLength variable self.env.obsLen = len(self.env.getSensors()) #normalization for the task spezific sensors for i in range(self.env.obsLen - 2 * self.env.actLen): self.sensor_limits.append((-4, 4)) self.actor_limits = None
def __init__(self, environment,world): self.world2d = world """ All tasks are coupled to an environment. """ EpisodicTask.__init__(self,environment) self.env = environment self.reward_history = [] # limits for scaling of sensors and actors (None=disabled) #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)] #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)] #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)] self.actor_limits = None #self.actor_limits = [(0.0,1.0),(0.0,1.0)]#,(-0.3,0.3),(-0.3,0.3),(-1.0,1.0)] #self.actor_limits = [(-2.0856,2.0856),(-0.6720+ 0.6720,0.5149 + 0.6720)] #self.actor_limits = [(-1.64933,-0.00872)] #self.actor_limits = [(-2.0856,2.0856),(-0.6720,0.5149)] #self.sensor_limits = [(-1.64933,-0.00872),(-2.0856,2.0856),(-2.0856,2.0856),(0.00872,1.562),(0,640),(0,480),(0,300),(-2.0856,2.0856),(-0.6720,0.5149)] # joints, X,Y,radius, head #self.sensor_limits = [(0.0,640.0),(0.0,480.0),(-2.0856,2.0856),(-0.6720,0.5149),(0.0,300.0)] # joints, X,Y,radius, head #self.sensor_limits = [(-2.0856,2.0856),(-0.6720,0.5149)]#,(0.0,500.0),(-1.0,1.0),(-1.0,1.0)] self.sensor_limits = [(0.0,160.0),(0.0,120.0),(0.0,80.0)]#,(-2.0856,2.0856),(-0.6720,0.5149)] #self.sensor_limits = [(0.0,640.0),(0.0,480.0),(0.0,300.0)] # joints, X,Y,radius, head #self.sensor_limits = [(-1.64933,-0.00872),(-2.0856,2.0856),(-2.0856,2.0856)] #self.sensor_limits = [(0,500),(0,400)] self.clipping = True self.count = 0 #timestep counter self.oldAction = zeros(environment.indim, float) # needed for distance difference at first run self.getObservation()
def getObservation(self): sensors = EpisodicTask.getObservation(self) # Get the current joint angles [rad] # The joint angle values will be between -pi and pi in radians self.joint_angles = np.asarray(self.env.getSensorByName('JointSensor')) # Get the current joint velocities [rad/s] self.joint_velocities = np.asarray( self.env.getSensorByName('JointVelocitySensor')) # Get the current tooltip position (x, y, z) [m] self.tooltip_position = np.asarray( self.env.getSensorByName('tooltipPos')) return sensors
def performAction(self, action): EpisodicTask.performAction(self, action) self.t += self.dt self.appendLog()
def reset(self): self.steps = 0 EpisodicTask.reset(self)
def performAction(self, action): self.t += 1 EpisodicTask.performAction(self, action)
def reset(self): EpisodicTask.reset(self) self.day = weather.daily(date="random") self.t = 0
def performAction(self, action): EpisodicTask.performAction(self, action*self.maxPower)
def reset(self): EpisodicTask.reset(self) self.t = 0
def __init__(self, env, animat): EpisodicTask.__init__(self, env) self.animat = animat
def __init__(self, environment): EpisodicTask.__init__(self, environment) self.N = 15 self.t = 0 self.state = [0.0] * environment.dim self.action = [0.0] * environment.dim
def performAction(self, action): EpisodicTask.performAction(self, action) self.action = action
def getObservation(self): self.state = EpisodicTask.getObservation(self) return self.state
def reset(self): EpisodicTask.reset(self) self.t = 0 self.lastFitness = 0.0 self.bestFitness = 0.0 self.appendLog() # write first line!
def __init__(self): self.environment = SimpleraceEnvironment() EpisodicTask.__init__(self, self.environment) self.t = 0
def reset(self): EpisodicTask.reset(self) self.total_reward = 0.0