示例#1
0
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max,
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = self._reward_function(terminal)
        return reward, ns, terminal, self.possibleActions()
示例#2
0
文件: RCCar.py 项目: zyc9012/rlpy
    def step(self, a):
        x, y, speed, heading = self.state
        # Map a number between [0,8] to a pair. The first element is
        # acceleration direction. The second one is the indicator for the wheel
        acc, turn = id2vec(a, [3, 3])
        acc -= 1  # Mapping acc to [-1, 0 1]
        turn -= 1  # Mapping turn to [-1, 0 1]

        # Calculate next state
        nx = x + speed * np.cos(heading) * self.delta_t
        ny = y + speed * np.sin(heading) * self.delta_t
        nspeed = speed + acc * self.ACCELERATION * self.delta_t
        nheading = heading + speed / self.CAR_LENGTH * np.tan(turn * self.TURN_ANGLE) * self.delta_t

        # Bound values
        nx = bound(nx, self.XMIN, self.XMAX)
        ny = bound(ny, self.YMIN, self.YMAX)
        nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
        nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)

        # Collision to wall => set the speed to zero
        if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX:
            nspeed = 0

        ns = np.array([nx, ny, nspeed, nheading])
        self.state = ns.copy()
        terminal = self.isTerminal()
        r = self.GOAL_REWARD if terminal else self.STEP_REWARD
        return r, ns, terminal, self.possibleActions()
示例#3
0
文件: RCCar.py 项目: zhexiaozhe/rlpy
    def step(self, a):
        x, y, speed, heading = self.state
        # Map a number between [0,8] to a pair. The first element is
        # acceleration direction. The second one is the indicator for the wheel
        acc, turn = id2vec(a, [3, 3])
        acc -= 1  # Mapping acc to [-1, 0 1]
        turn -= 1  # Mapping turn to [-1, 0 1]

        # Calculate next state
        nx = x + speed * np.cos(heading) * self.delta_t
        ny = y + speed * np.sin(heading) * self.delta_t
        nspeed = speed + acc * self.ACCELERATION * self.delta_t
        nheading    = heading + speed / self.CAR_LENGTH * \
            np.tan(turn * self.TURN_ANGLE) * self.delta_t

        # Bound values
        nx = bound(nx, self.XMIN, self.XMAX)
        ny = bound(ny, self.YMIN, self.YMAX)
        nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
        nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)

        # Collision to wall => set the speed to zero
        if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX:
            nspeed = 0

        ns = np.array([nx, ny, nspeed, nheading])
        self.state = ns.copy()
        terminal = self.isTerminal()
        r = self.GOAL_REWARD if terminal else self.STEP_REWARD
        return r, ns, terminal, self.possibleActions()
示例#4
0
文件: Acrobot.py 项目: MLDL/rlpy
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-
                                                self.torque_noise_max, self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
示例#5
0
 def _check_value_bounds(self, nx, ny, nspeed, nheading):
     nx = bound(nx, self.XMIN, self.XMAX)
     ny = bound(ny, self.YMIN, self.YMAX)
     nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
     nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)
     ns = np.array([nx, ny, nspeed, nheading])
     return ns
示例#6
0
    def step(self, a):

        torque = self.AVAIL_TORQUE[a]
        s = self.state

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max,
                                                self.torque_noise_max)

        s_augmented = np.append(s, torque)
        for i in range(4):
            s_dot = np.array(self._dsdt(s_augmented, 0))
            s_augmented += s_dot * self.dt / 4.

            # make sure that we don't have 2 free pendulums but a "gymnast"
            # for k in range(2):
            #    if np.abs(s_augmented[k]) > np.pi:
            #        s_augmented[k] = np.sign(s_augmented[k]) * np.pi
            #        s_augmented[k + 2] = 0.
            s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi)
            s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi)
            s_augmented[2] = bound(s_augmented[2], -self.MAX_VEL_1,
                                   self.MAX_VEL_1)
            s_augmented[3] = bound(s_augmented[3], -self.MAX_VEL_2,
                                   self.MAX_VEL_2)

        ns = s_augmented[:4]  # omit action
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
    def step(self, a):
        s = self.state[:4]
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max, 
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)

        self.prev_states.append(s)  
        ns = self.augment_state(ns)
        self.state = ns.copy()
        
        terminal = self.isTerminal()

        if not terminal:

            ########### GOAL FUNCTION: Bound ############

            while not self.isTerminal() and self.goalfn(ns[:4], self.goalArray[0]): 
                self.goalArray = self.goalArray[1:]
                print "Goal reached", len(self.prev_states), self.goalArray, self.encodingFunction(self.prev_states)
                import ipdb; ipdb.set_trace()

            ############################################

        # default reward setup
        reward = self.STEP_REWARD if not terminal else self.GOAL_REWARD


        if not terminal and self.rewardFunction != None:
            reward = self.rewardFunction(self.prev_states, 
                                    self.goalArray, 
                                    self.STEP_REWARD, 
                                    self.GOAL_REWARD)
        
        return reward, ns, terminal, self.possibleActions()
示例#8
0
    def step(self, a):
        """
        Take acceleration action *a*, adding noise as specified in ``__init__()``.

        """
        position, velocity = self.state
        noise = self.accelerationFactor * self.noise * \
            2 * (self.random_state.rand() - .5)
        velocity += (noise +
                     self.actions[a] * self.accelerationFactor +
                     np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
        velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
        position += velocity
        position = bound(position, self.XMIN, self.XMAX)
        if position <= self.XMIN and velocity < 0:
            velocity = 0  # Bump into wall
        terminal = self.isTerminal()
        r = self.GOAL_REWARD if terminal else self.STEP_REWARD
        ns = np.array([position, velocity])
        self.state = ns.copy()
        return r, ns, terminal, self.possibleActions()
示例#9
0
文件: Acrobot.py 项目: MLDL/rlpy
    def step(self, a):

        torque = self.AVAIL_TORQUE[a]
        s = self.state

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-
                                                self.torque_noise_max, self.torque_noise_max)

        s_augmented = np.append(s, torque)
        for i in range(4):
            s_dot = np.array(self._dsdt(s_augmented, 0))
            s_augmented += s_dot * self.dt / 4.

            # make sure that we don't have 2 free pendulums but a "gymnast"
            # for k in range(2):
            #    if np.abs(s_augmented[k]) > np.pi:
            #        s_augmented[k] = np.sign(s_augmented[k]) * np.pi
            #        s_augmented[k + 2] = 0.
            s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi)
            s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi)
            s_augmented[2] = bound(
                s_augmented[2],
                -self.MAX_VEL_1,
                self.MAX_VEL_1)
            s_augmented[3] = bound(
                s_augmented[3],
                -self.MAX_VEL_2,
                self.MAX_VEL_2)

        ns = s_augmented[:4]  # omit action
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
示例#10
0
    def _stepFourState(self, s, a):
        """
        :param s: the four-dimensional cartpole state
        Performs a single step of the CartPoleDomain without modifying
        ``self.state`` - this is left to the ``step()`` function in child
        classes.
        """

        forceAction = self.AVAIL_FORCE[a]

        # Add noise to the force action
        if self.force_noise_max > 0:
            forceAction += self.random_state.uniform(-
                                                     self.force_noise_max, self.force_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, forceAction)

        #-------------------------------------------------------------------------#
        # There are several ways of integrating the nonlinear dynamics equations. #
        # For consistency with prior results, we tested the custom integration    #
        # method devloped by Lagoudakis & Parr (2003) for their Pendulum Inverted #
        # Balance task.                                                           #
        # For our own experiments we use rk4 from mlab or euler integration.      #
        #                                                                         #
        # Integration method         Sample runtime  Error with scipy.integrate   #
        #                                              (most accurate method)     #
        #  euler (custom)              ?min ??sec              ????%              #
        #  rk4 (mlab)                  3min 15sec            < 0.01%              #
        #  pendulum_ode45 (L&P '03)    7min 15sec            ~ 2.00%              #
        #  integrate.odeint (scipy)    9min 30sec            -------              #
        #                                                                         #
        # Use of these methods is supported by selectively commenting             #
        # sections below.                                                         #
        #-------------------------------------------------------------------------#
        if self.int_type == "euler":
            int_fun = self.euler_int
        elif self.int_type == "odeint":
            int_fun = scipy.integrate.odeint
        else:
            int_fun = rk4
        ns = int_fun(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]

        ns = ns[0:4]  # [theta, thetadot, x, xDot]
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        theta = wrap(ns[StateIndex.THETA], -np.pi, np.pi)
        ns[StateIndex.THETA] = bound(
            theta,
            self.ANGLE_LIMITS[0],
            self.ANGLE_LIMITS[1])
        ns[StateIndex.THETA_DOT] = bound(
            ns[StateIndex.THETA_DOT],
            self.ANGULAR_RATE_LIMITS[0],
            self.ANGULAR_RATE_LIMITS[1])
        ns[StateIndex.X] = bound(
            ns[StateIndex.X],
            self.POSITION_LIMITS[0],
            self.POSITION_LIMITS[1])
        ns[StateIndex.X_DOT] = bound(
            ns[StateIndex.X_DOT],
            self.VELOCITY_LIMITS[0],
            self.VELOCITY_LIMITS[1])

        return ns