Python EpisodicTask示例，pybrain.rl.environments.EpisodicTask Python示例

示例#1

0

显示文件

文件： tasks.py 项目： DanSGraham/code

 def reset(self):
     self.reward[0] = 0.0
     self.rawReward = 0.0
     self.env.reset()
     self.action = [self.env.dists[0]] * self.outDim
     self.epiStep = 0
     EpisodicTask.reset(self)

示例#2

0

显示文件

文件： acrobot.py 项目： ZachPhillipsGary/CS200-NLP-ANNsProject

 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0 
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]

示例#3

0

显示文件

文件： pomdp.py 项目： ishatserka/MachineLearningAndDataAnalysisCoursera

 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant=True)
     self.steps += 1
     EpisodicTask.performAction(self, action)

示例#4

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def performAction(self, action):
        action = self.action_from_joint_angles(action)

        # Carry out the action based on angular velocities
        EpisodicTask.performAction(self, action)

        return

示例#5

0

显示文件

文件： pomdp.py 项目： ZachPhillipsGary/CS200-NLP-ANNsProject

 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant = True)
     self.steps += 1
     EpisodicTask.performAction(self, action)

示例#6

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def performAction(self, action):
        action = self.action_from_joint_angles(action)

        # Carry out the action based on angular velocities
        EpisodicTask.performAction(self, action)

        return

示例#7

0

显示文件

 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]

示例#8

0

显示文件

文件： example.py 项目： nvaller/pug-ann

    def __init__(self, env=None, maxsteps=1000, desiredValue=0, location="Portland, OR"):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        self.location = location
        self.airport_code = weather.airport(location)
        self.desiredValue = desiredValue
        if env is None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-np.pi, np.pi))

        # self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-50, 50)]

示例#9

0

显示文件

文件： example.py 项目： nvaller/pug-ann

    def __init__(self, env=None, maxsteps=1000, desiredValue=0, location='Portland, OR'):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        self.location = location
        self.airport_code = weather.airport(location)
        self.desiredValue = desiredValue
        if env is None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-np.pi, np.pi))

        # self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-50, 50)]

示例#10

0

显示文件

文件： tasks.py 项目： AlexProgramm/pybrain3

 def reset(self):
     self.reward[0] = 0.0   
     self.rawReward = 0.0         
     self.env.reset()
     self.action = [self.env.dists[0]] * self.outDim
     self.epiStep = 0
     EpisodicTask.reset(self)

示例#11

0

显示文件

文件： johnnie.py 项目： DanSGraham/code

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard Johnnie task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn
     isJoints=self.env.getSensorByName('JointSensor') #The joint angles
     isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall
     action=tanh((act-isJoints-isSpeeds)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)

示例#12

0

显示文件

文件： INFOMAX__mazeTask.py 项目： krylenko/python

	def __init__(self,environment, maxSteps, goalTolerance):
		EpisodicTask.__init__(self,environment)
		self.env = environment
		self.count = 0
		self.atGoal = False
		self.MAX_STEPS = maxSteps
		self.GOAL_TOLERANCE = goalTolerance
		self.oldDist = 0
		self.reward = 0

示例#13

0

显示文件

文件： tasks.py 项目： AlexProgramm/pybrain3

 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """                
     # scaling
     self.incStep()
     action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action = action.copy()

示例#14

0

显示文件

文件： tasks.py 项目： DanSGraham/code

 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """
     # scaling
     self.incStep()
     action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action = action.copy()

示例#15

0

显示文件

文件： tasks.py 项目： andschwa/uidaho-cs470-moonlander

    def __init__(self, environment=None, batchSize=1):
        self.batchSize = batchSize
        if environment is None:
            self.env = Lander()
        else:
            self.env = environment
        EpisodicTask.__init__(self, self.env)

        self.sensor_limits = [(0.0, 200.0), (0.0, 35.0), (0.0, 4.0),
                              (-6.0, 6.0), (-0.4, 0.4),
                              (-0.15, 0.15), (0.0, 200.0)]

示例#16

0

显示文件

文件： johnnie.py 项目： ishatserka/MachineLearningAndDataAnalysisCoursera

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard Johnnie task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn
     isJoints = self.env.getSensorByName('JointSensor')  #The joint angles
     isSpeeds = self.env.getSensorByName(
         'JointVelocitySensor')  #The joint angular velocitys
     act = (action + 1.0) / 2.0 * (
         self.env.cHighList - self.env.cLowList
     ) + self.env.cLowList  #norm output to action intervall
     action = tanh((act - isJoints - isSpeeds) *
                   16.0) * self.maxPower * self.env.tourqueList  #simple PID
     EpisodicTask.performAction(self, action)

示例#17

0

显示文件

文件： ccrl.py 项目： Boblogic07/pybrain

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard CCRL task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn
     self.oldAction = action
     #Grasping as reflex depending on the distance to target - comment in for more easy grasping
     if abs(abs(self.dist[:3]).sum())<2.0: action[15]=1.0 #self.grepRew=action[15]*.01
     else: action[15]=-1.0 #self.grepRew=action[15]*-.03
     isJoints=array(self.env.getSensorByName('JointSensor')) #The joint angles
     isSpeeds=array(self.env.getSensorByName('JointVelocitySensor')) #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall
     action=tanh((act-isJoints-0.9*isSpeeds*self.env.tourqueList)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)

示例#18

0

显示文件

文件： tetra_tasks.py 项目： gcobos/leggedbot

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard Tetra2 task uses a PID controller to control directly angles instead of forces
     #This makes most tasks much simpler to learn
     isJoints=self.env.getSensorByName('JointSensor') #The joint angles
     #print "Pos:", [int(i*10) for i in isJoints]
     isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys
     #print "Speeds:", [int(i*10) for i in isSpeeds]
     #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList
     #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall
     #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID
     #print "Action", act[:5]
     EpisodicTask.performAction(self, action *self.maxPower*self.env.tourqueList)

示例#19

0

显示文件

文件： EpisodicChemotaxisTask.py 项目： desophos/chemotaxis

	def __init__(self, env=None, maxsteps=1000):
		"""
		:key env: (optional) an instance of a ChemotaxisEnv (or a subclass thereof)
		:key maxsteps: maximal number of steps (default: 1000)
		"""
		if env == None:
			env = ChemotaxisEnv()
		self.env = env
		
		EpisodicTask.__init__(self, env)
		self.N = maxsteps
		self.t = 0
		
		#self.actor_limits = [(0,1), (0,1)] # scale (-1,1) to motor neurons
		self.sensor_limits = [(0,1), (0,1)] # scale sensor neurons to (-1,1)

示例#20

0

显示文件

文件： tetra_tasks.py 项目： muditrastogi/leggedbot

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard Tetra2 task uses a PID controller to control directly angles instead of forces
     #This makes most tasks much simpler to learn
     isJoints = self.env.getSensorByName('JointSensor')  #The joint angles
     #print "Pos:", [int(i*10) for i in isJoints]
     isSpeeds = self.env.getSensorByName(
         'JointVelocitySensor')  #The joint angular velocitys
     #print "Speeds:", [int(i*10) for i in isSpeeds]
     #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList
     #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall
     #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID
     #print "Action", act[:5]
     EpisodicTask.performAction(
         self, action * self.maxPower * self.env.tourqueList)

示例#21

0

显示文件

文件： GoToGoalTask.py 项目： atkaiser/animats

 def __init__(self, env = None, maxsteps = 1000):
     if env == None:
         env = CarEnvironment()
     EpisodicTask.__init__(self, env)
     self.N = maxsteps
     self.t = 0
     # (vel, deg, dist_to_goal, dir_of_goal, direction_diff)
     self.sensor_limits = [(-30.0, 100.0),
                           (0.0, 360.0),
                           (0.0, variables.grid_size*2),
                           (-180.0, 180.0),
                           (-180.0, 180.0)]
     self.actor_limits = [(-1.0, +4.5), (-90.0, +90.0)]
     self.rewardscale = 100.0 / env.distance_to_goal
     self.total_reward = 0.0

示例#22

0

显示文件

文件： robot_tasks.py 项目： akansal1/einstein

    def __init__(self, env=None, maxsteps=1000, desiredValue = 0, tolorance = 0.3):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        self.desiredValue = desiredValue
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0
        self.tolorance = tolorance


        # self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-15, 15)]

示例#23

0

显示文件

文件： tasks.py 项目： DanSGraham/code

 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step = 0
     self.epiStep = 0
     self.reward = [0.0]
     self.rawReward = 0.0
     self.obsSensors = ["EdgesReal"]
     self.rewardSensor = [""]
     self.oldReward = 0.0
     self.plotString = ["World Interactions", "Reward", "Reward on NoReward Task"]
     self.inDim = len(self.getObservation())
     self.outDim = self.env.actLen
     self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0]
     self.maxSpeed = self.dif / 30.0
     self.picCount = 0
     self.epiLen = 1

示例#24

0

显示文件

文件： tasks.py 项目： ZhengYi0310/ua-ros-pkg

    def __init__(
        self, environment, sort_beliefs=True, do_decay_beliefs=True, uniform_initial_beliefs=True, max_steps=30
    ):
        EpisodicTask.__init__(self, environment)
        self.verbose = False
        self.listActions = False

        self.env = environment

        self.uniform_initial_beliefs = uniform_initial_beliefs
        self.max_steps = max_steps
        self.rewardscale = 1.0  # /self.max_steps

        self.state_ids = {"init": 0, "grasped": 1, "lifted": 2, "placed": 3}

        self.initialize()

示例#25

0

显示文件

文件： balancetask.py 项目： Boblogic07/pybrain

    def __init__(self, env=None, maxsteps=1000):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # no scaling of sensors
        self.sensor_limits = [None] * 2

        # scale actor
        self.actor_limits = [(-50, 50)]

示例#26

0

显示文件

文件： tasks.py 项目： AlexProgramm/pybrain3

 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step = 0
     self.epiStep = 0
     self.reward = [0.0]
     self.rawReward = 0.0
     self.obsSensors = ["EdgesReal"]
     self.rewardSensor = [""]
     self.oldReward = 0.0
     self.plotString = ["World Interactions", "Reward", "Reward on NoReward Task"]    
     self.inDim = len(self.getObservation())
     self.outDim = self.env.actLen        
     self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0]
     self.maxSpeed = self.dif / 30.0 
     self.picCount = 0
     self.epiLen = 1

示例#27

0

显示文件

    def __init__(self, env=None, maxsteps=1000):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # no scaling of sensors
        self.sensor_limits = [None] * 2

        # scale actor
        self.actor_limits = [(-50, 50)]

示例#28

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def __init__(self, env):
        EpisodicTask.__init__(self, env)

        self.pause = False

        # Holds all rewards given in each episode
        self.reward_history = []

        # The current timestep counter
        self.count = 0

        # The number of timesteps in the episode
        self.epiLen = 1500

        # Counts the task resets for incremental learning
        self.incLearn = 0

        # Sensor limit values can be used to normalize the sensors from
        # (low, high) to (-1.0, 1.0) given the low and high values. We want to
        # maintain all units by keeping the results unnormalized for now.
        # Keeping the values of these lists at None skips the normalization.
        self.sensor_limits = None
        self.actor_limits = None

        # Create all current joint observation attributes
        self.joint_angles = []  # [rad]
        self.joint_velocities = []  # [rad/s]

        # Create the attribute for current tooltip position (x, y, z) [m]
        self.tooltip_position = []  # [m]

        # Create all joint angle [rad] limit attributes
        self.joint_max_angles = []
        self.joint_min_angles = []

        # Create all joint velocity [rad/s] and torque [N*m] limit attributes
        self.joint_max_velocities = []
        self.joint_max_torques = []

        # Call the setter function for the joint limit attributes
        self.set_joint_angle_limits()
        self.set_joint_velocity_limits()
        self.set_joint_torque_limits()

        return

示例#29

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def __init__(self, env):
        EpisodicTask.__init__(self, env)

        self.pause = False

        # Holds all rewards given in each episode
        self.reward_history = []

        # The current timestep counter
        self.count = 0

        # The number of timesteps in the episode
        self.epiLen = 1500

        # Counts the task resets for incremental learning
        self.incLearn = 0

        # Sensor limit values can be used to normalize the sensors from
        # (low, high) to (-1.0, 1.0) given the low and high values. We want to
        # maintain all units by keeping the results unnormalized for now.
        # Keeping the values of these lists at None skips the normalization.
        self.sensor_limits = None
        self.actor_limits = None

        # Create all current joint observation attributes
        self.joint_angles = []         # [rad]
        self.joint_velocities = []     # [rad/s]

        # Create the attribute for current tooltip position (x, y, z) [m]
        self.tooltip_position = []     # [m]

        # Create all joint angle [rad] limit attributes
        self.joint_max_angles = []
        self.joint_min_angles = []

        # Create all joint velocity [rad/s] and torque [N*m] limit attributes
        self.joint_max_velocities = []
        self.joint_max_torques = []

        # Call the setter function for the joint limit attributes
        self.set_joint_angle_limits()
        self.set_joint_velocity_limits()
        self.set_joint_torque_limits()

        return

示例#30

0

显示文件

文件： tasks.py 项目： ntrntr/ua-ros-pkg

 def __init__(self, environment,
                    sort_beliefs=True,
                    do_decay_beliefs=True,
                    uniform_initial_beliefs=True,
                    max_steps=30):
     EpisodicTask.__init__(self, environment)
     self.verbose = False
     self.listActions = False
     
     self.env = environment
     
     self.uniform_initial_beliefs = uniform_initial_beliefs
     self.max_steps = max_steps
     self.rewardscale = 1.0 #/self.max_steps
     
     self.state_ids = {'init': 0, 'grasped': 1, 'lifted': 2, 'placed': 3}
     
     self.initialize()

示例#31

0

显示文件

文件： northwardtask.py 项目： ishatserka/MachineLearningAndDataAnalysisCoursera

 def __init__(self, env=None, maxsteps=1000):
     """
     :key env: (optional) an instance of a ShipSteeringEnvironment (or a subclass thereof)
     :key maxsteps: maximal number of steps (default: 1000) 
     """
     if env == None:
         env = ShipSteeringEnvironment(render=False)
     EpisodicTask.__init__(self, env) 
     self.N = maxsteps
     self.t = 0
     
     # scale sensors
     #                          [h,              hdot,           v] 
     self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)]
     
     # actions:              thrust,       rudder       
     self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
     # scale reward over episode, such that max. return = 100
     self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]

示例#32

0

显示文件

文件： northwardtask.py 项目： Boblogic07/pybrain

    def __init__(self, env=None, maxsteps=1000):
        """
        :key env: (optional) an instance of a ShipSteeringEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000)
        """
        if env == None:
            env = ShipSteeringEnvironment(render=False)
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale sensors
        #                          [h,              hdot,           v]
        self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)]

        # actions:              thrust,       rudder
        self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
        # scale reward over episode, such that max. return = 100
        self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]

示例#33

0

显示文件

    def __init__(self, env = None, maxtime = 25, timestep = 0.1, logging = False):
        """
        :key env: (optional) an instance of a DockingEnvironment (or a subclass thereof)
        :key maxtime: (optional) maximum number per task (default: 25)
        """
        if env == None:
            env = DockingEnvironment()
        EpisodicTask.__init__(self, env)
        self.maxTime = maxtime
        self.dt = timestep
        self.env.dt = self.dt
        self.t = 0
        self.logging = logging   
        self.logfileName = 'logging_' + str() + '.txt' 

        # actions:               u_l,           u_r
        self.actor_limits = [(-0.1, +0.1), (-0.1, +0.1), (0.0, 1.0)]

        self.lastFitness = 0.0 
        self.bestFitness = 0.0

示例#34

0

显示文件

文件： johnnie.py 项目： ZachPhillipsGary/CS200-NLP-ANNsProject

    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0 #counts the task resets for incrementall learning
        self.env.FricMu = 20.0 #We need higher friction for Johnnie
        self.env.dt = 0.01 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen

示例#35

0

显示文件

文件： ccrl.py 项目： xj361685640/molSimplify

    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower = 100.0
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 1500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for CCRL
        self.env.dt = 0.002  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
        self.oldAction = zeros(env.actLen, float)
        self.dist = zeros(9, float)
        self.dif = array([0.0, 0.0, 0.0])
        self.target = array([-6.5, 1.75, -10.5])
        self.grepRew = 0.0
        self.tableFlag = 0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'],
                                                      "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen = len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen - 2 * self.env.actLen):
            self.sensor_limits.append((-4, 4))
        self.actor_limits = None

示例#36

0

显示文件

文件： ccrl.py 项目： xj361685640/molSimplify

 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment
     #The standard CCRL task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn
     self.oldAction = action
     #Grasping as reflex depending on the distance to target - comment in for more easy grasping
     if abs(abs(self.dist[:3]).sum()) < 2.0:
         action[15] = 1.0  #self.grepRew=action[15]*.01
     else:
         action[15] = -1.0  #self.grepRew=action[15]*-.03
     isJoints = array(
         self.env.getSensorByName('JointSensor'))  #The joint angles
     isSpeeds = array(self.env.getSensorByName(
         'JointVelocitySensor'))  #The joint angular velocitys
     act = (action + 1.0) / 2.0 * (
         self.env.cHighList - self.env.cLowList
     ) + self.env.cLowList  #norm output to action intervall
     action = tanh(
         (act - isJoints - 0.9 * isSpeeds * self.env.tourqueList) *
         16.0) * self.maxPower * self.env.tourqueList  #simple PID
     EpisodicTask.performAction(self, action)

示例#37

0

显示文件

文件： johnnie.py 项目： ishatserka/MachineLearningAndDataAnalysisCoursera

    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower = 100.0  #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for Johnnie
        self.env.dt = 0.01  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen

示例#38

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def getObservation(self):
        sensors = EpisodicTask.getObservation(self)

        # Get the current joint angles [rad]
        # The joint angle values will be between -pi and pi in radians
        self.joint_angles = np.asarray(self.env.getSensorByName('JointSensor'))

        # Get the current joint velocities [rad/s]
        self.joint_velocities = np.asarray(self.env.getSensorByName('JointVelocitySensor'))

        # Get the current tooltip position (x, y, z) [m]
        self.tooltip_position = np.asarray(self.env.getSensorByName('tooltipPos'))

        return sensors

示例#39

0

显示文件

文件： ccrl.py 项目： Boblogic07/pybrain

    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower = 100.0
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen = 1500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0 #counts the task resets for incrementall learning
        self.env.FricMu = 20.0 #We need higher friction for CCRL
        self.env.dt = 0.002 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
        self.oldAction = zeros(env.actLen, float)
        self.dist = zeros(9, float)
        self.dif = array([0.0, 0.0, 0.0])
        self.target = array([-6.5, 1.75, -10.5])
        self.grepRew = 0.0
        self.tableFlag = 0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen = len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen - 2 * self.env.actLen):
            self.sensor_limits.append((-4, 4))
        self.actor_limits = None

示例#40

0

显示文件

文件： grabbingTask.py 项目： c0de2014/nao-control

    def __init__(self, environment,world):
        self.world2d = world
        """ All tasks are coupled to an environment. """
        EpisodicTask.__init__(self,environment)
        self.env = environment
        self.reward_history = []
        # limits for scaling of sensors and actors (None=disabled)
        #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)]
        #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)]
        #self.actor_limits = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)]

        self.actor_limits = None
        #self.actor_limits = [(0.0,1.0),(0.0,1.0)]#,(-0.3,0.3),(-0.3,0.3),(-1.0,1.0)]
        
        #self.actor_limits = [(-2.0856,2.0856),(-0.6720+ 0.6720,0.5149 + 0.6720)]

        #self.actor_limits = [(-1.64933,-0.00872)]
        
        #self.actor_limits = [(-2.0856,2.0856),(-0.6720,0.5149)]
        #self.sensor_limits = [(-1.64933,-0.00872),(-2.0856,2.0856),(-2.0856,2.0856),(0.00872,1.562),(0,640),(0,480),(0,300),(-2.0856,2.0856),(-0.6720,0.5149)] # joints, X,Y,radius, head
        #self.sensor_limits = [(0.0,640.0),(0.0,480.0),(-2.0856,2.0856),(-0.6720,0.5149),(0.0,300.0)] # joints, X,Y,radius, head

        #self.sensor_limits = [(-2.0856,2.0856),(-0.6720,0.5149)]#,(0.0,500.0),(-1.0,1.0),(-1.0,1.0)]
        self.sensor_limits = [(0.0,160.0),(0.0,120.0),(0.0,80.0)]#,(-2.0856,2.0856),(-0.6720,0.5149)]

        #self.sensor_limits = [(0.0,640.0),(0.0,480.0),(0.0,300.0)] # joints, X,Y,radius, head


        #self.sensor_limits = [(-1.64933,-0.00872),(-2.0856,2.0856),(-2.0856,2.0856)]
        #self.sensor_limits = [(0,500),(0,400)]
        self.clipping = True
        self.count = 0 #timestep counter

        self.oldAction = zeros(environment.indim, float)

        # needed for distance difference at first run
        self.getObservation()

示例#41

0

显示文件

文件： task.py 项目： evansneath/surgicalsim

    def getObservation(self):
        sensors = EpisodicTask.getObservation(self)

        # Get the current joint angles [rad]
        # The joint angle values will be between -pi and pi in radians
        self.joint_angles = np.asarray(self.env.getSensorByName('JointSensor'))

        # Get the current joint velocities [rad/s]
        self.joint_velocities = np.asarray(
            self.env.getSensorByName('JointVelocitySensor'))

        # Get the current tooltip position (x, y, z) [m]
        self.tooltip_position = np.asarray(
            self.env.getSensorByName('tooltipPos'))

        return sensors

示例#42

0

显示文件

 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.t += self.dt
     self.appendLog()

示例#43

0

显示文件

文件： pomdp.py 项目： ishatserka/MachineLearningAndDataAnalysisCoursera

 def reset(self):
     self.steps = 0
     EpisodicTask.reset(self)

示例#44

0

显示文件

文件： example.py 项目： nvaller/pug-ann

 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)

示例#45

0

显示文件

文件： example.py 项目： nvaller/pug-ann

 def reset(self):
     EpisodicTask.reset(self)
     self.day = weather.daily(date="random")
     self.t = 0

示例#46

0

显示文件

文件： acrobot.py 项目： G3ra1d0/Digit-Recognition

 def performAction(self, action):
     EpisodicTask.performAction(self, action*self.maxPower)

示例#47

0

显示文件

文件： tasks.py 项目： xj361685640/molSimplify

 def reset(self):
     EpisodicTask.reset(self)
     self.t = 0

示例#48

0

显示文件

文件： interact_task.py 项目： tielong/communication-swarm-intelligence

 def __init__(self, env, animat):
     EpisodicTask.__init__(self, env)
     self.animat = animat

示例#49

0

显示文件

 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)

示例#50

0

显示文件

文件： pomdp.py 项目： ZachPhillipsGary/CS200-NLP-ANNsProject

 def reset(self):
     self.steps = 0
     EpisodicTask.reset(self)

示例#51

0

显示文件

文件： tasks.py 项目： xj361685640/molSimplify

 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim

示例#52

0

显示文件

文件： tasks.py 项目： xj361685640/molSimplify

 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.action = action

示例#53

0

显示文件

文件： tasks.py 项目： xj361685640/molSimplify

 def getObservation(self):
     self.state = EpisodicTask.getObservation(self)
     return self.state

示例#54

0

显示文件

 def reset(self):
     EpisodicTask.reset(self)
     self.t = 0
     self.lastFitness = 0.0
     self.bestFitness = 0.0
     self.appendLog() # write first line!

示例#55

0

显示文件

 def __init__(self):
     self.environment = SimpleraceEnvironment()
     EpisodicTask.__init__(self, self.environment)
     self.t = 0

示例#56

0

显示文件

文件： interact_task.py 项目： tielong/communication-swarm-intelligence

 def reset(self):
     EpisodicTask.reset(self)
     self.total_reward = 0.0