Exemplo n.º 1
0
    def _ode(self, _, state, action):
        """Compute the state time-derivative.

        Parameters
        ----------
        state : ndarray
        action : ndarray

        Returns
        -------
        x_dot : Tensor
            The derivative of the state.

        """
        # Physical dynamics
        bk = get_backend(state)
        gravity = self.gravity
        length = self.length
        friction = self.friction
        inertia = self.inertia

        if bk == np:
            action = action.clip(-1.0, 1.0)
        else:
            action = action.clamp(-1.0, 1.0)

        self.last_action = action[0]

        angle, angular_velocity = state

        x_ddot = (gravity / length * bk.sin(angle) + action[..., 0] / inertia -
                  friction / inertia * angular_velocity)

        return np.array((angular_velocity, x_ddot))
Exemplo n.º 2
0
 def set_state(self, observation):
     """Set state from a given observation."""
     bk = get_backend(observation)
     self.state = bk.zeros_like(observation[..., :2])
     self.state[..., 0] = self.atan2(observation[..., 1], observation[...,
                                                                      0])
     self.state[..., 1] = observation[..., 2]
Exemplo n.º 3
0
    def _ode(self, t, state, action):
        """Compute the state time-derivative.

        Parameters
        ----------
        state: ndarray or Tensor
            States.
        action: ndarray or Tensor
            Actions.

        Returns
        -------
        state_derivative: Tensor
            The state derivative according to the dynamics.

        """
        bk = get_backend(state)
        # Physical dynamics
        velocity = state[..., 1]
        x_dot = velocity
        if bk == np:
            acceleration = bk.clip(action[..., 0], -self.max_action,
                                   self.max_action)
        else:
            acceleration = bk.clamp(action[..., 0], -self.max_action,
                                    self.max_action)
        v_dot = acceleration

        return bk.stack((x_dot, v_dot), -1)
Exemplo n.º 4
0
 def thrust(self, velocity, thrust):
     """Get the thrust coefficient."""
     bk = get_backend(velocity)
     return (-0.5 *
             bk.tanh(0.1 *
                     (bk.abs(self.drag_force(velocity) - thrust) - 30.0)) +
             0.5)
Exemplo n.º 5
0
    def _ode(self, _, state, action):
        """Compute the state time-derivative.

        Parameters
        ----------
        state: ndarray or Tensor
            States.
        action: ndarray or Tensor
            Actions.

        Returns
        -------
        state_derivative: Tensor
            The state derivative according to the dynamics.

        """
        # Physical dynamics
        velocity = state
        u = action

        bk = get_backend(velocity)
        m = 3.0 + 1.5 * bk.sin(bk.abs(velocity))  # mass
        k = self.thrust(velocity, u)  # thrust coefficient.
        v_dot = (k * u - self.drag_force(velocity)) / m

        return v_dot
Exemplo n.º 6
0
    def forward(self, state, action, next_state):
        """See `AbstractReward.forward()'."""
        bk = get_backend(next_state)
        end_effector = self._get_ee_pos(next_state[..., 0], next_state[..., 1])

        reward_state = bk.exp(-bk.square(end_effector).sum(-1) /
                              (self.length**2))
        return self.get_reward(reward_state, self.action_reward(action))
Exemplo n.º 7
0
 def action_reward(self, action):
     """Get action reward."""
     action = action[..., :self.dim_action[0]]  # get only true dimensions.
     bk = get_backend(action)
     if self.sparse:
         return bk.exp(-bk.square(action / self.action_scale).sum(-1)) - 1
     else:
         return -bk.square(action).sum(-1)
Exemplo n.º 8
0
    def get_ee_position(state):
        """Get the end effector position."""
        bk = get_backend(state)
        theta1, theta2 = state[..., 0], state[..., 1]
        theta3, theta4 = state[..., 2:3], state[..., 3:4]
        theta5, theta6 = state[..., 4:5], state[..., 5:6]

        rot_axis = bk.stack(
            [
                bk.cos(theta2) * bk.cos(theta1),
                bk.cos(theta2) * bk.sin(theta1),
                -bk.sin(theta2),
            ],
            -1,
        )
        rot_perp_axis = bk.stack(
            [-bk.sin(theta1),
             bk.cos(theta1),
             bk.zeros_like(theta1)], -1)

        cur_end = bk.stack(
            [
                0.1 * bk.cos(theta1) + 0.4 * bk.cos(theta1) * bk.cos(theta2),
                0.1 * bk.sin(theta1) + 0.4 * bk.sin(theta1) * bk.cos(theta2) -
                0.188,
                -0.4 * bk.sin(theta2),
            ],
            -1,
        )
        for length, hinge, roll in [(0.321, theta4, theta3),
                                    (0.16828, theta6, theta5)]:
            perp_all_axis = np.cross(rot_axis, rot_perp_axis)
            if bk is torch:
                perp_all_axis = torch.tensor(perp_all_axis,
                                             dtype=torch.get_default_dtype())

            x = rot_axis * bk.cos(hinge)
            y = bk.sin(hinge) * bk.sin(roll) * rot_perp_axis
            z = -bk.sin(hinge) * bk.cos(roll) * perp_all_axis

            new_rot_axis = x + y + z
            new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
            if bk is torch:
                new_rot_perp_axis = torch.tensor(
                    new_rot_perp_axis, dtype=torch.get_default_dtype())

            norm = bk.sqrt(bk.square(new_rot_perp_axis).sum(-1))
            new_rot_perp_axis[norm < 1e-30] = rot_perp_axis[norm < 1e-30]

            new_rot_perp_axis /= bk.sqrt(
                bk.square(new_rot_perp_axis).sum(-1))[..., None]

            rot_axis, rot_perp_axis = new_rot_axis, new_rot_perp_axis
            cur_end = cur_end + length * new_rot_axis

        return cur_end
Exemplo n.º 9
0
def rk4(derivs, y0, t, *args, **kwargs):
    """Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.

    This is a toy implementation which may be useful if you find
    yourself stranded on a system w/o scipy.  Otherwise use
    :func:`scipy.integrate`.
    *y0*
        initial state vector
    *t*
        sample times
    *derivs*
        returns the derivative of the system and has the
        signature ``dy = derivs(yi, ti)``
    *args*
        additional arguments passed to the derivative function
    *kwargs*
        additional keyword arguments passed to the derivative function
    Example 1 ::
        ## 2D system
        def derivs6(x,t):
            d1 =  x[0] + 2*x[1]
            d2 =  -3*x[0] + 4*x[1]
            return (d1, d2)
        dt = 0.0005
        t = arange(0.0, 2.0, dt)
        y0 = (1,2)
        yout = rk4(derivs6, y0, t)
    Example 2::
        ## 1D system
        alpha = 2
        def derivs(x,t):
            return -alpha*x + exp(-t)
        y0 = 1
        yout = rk4(derivs, y0, t)
    If you have access to scipy, you should probably be using the
    scipy.integrate tools rather than this function.
    """
    bk = get_backend(y0)
    yout = bk.zeros((len(t), *y0.shape))

    yout[0] = y0

    for i in np.arange(len(t) - 1):
        thist = t[i]
        dt = t[i + 1] - thist
        dt2 = dt / 2.0
        y0 = yout[i]

        k1 = derivs(y0, thist, *args, **kwargs)
        k2 = derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs)
        k3 = derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs)
        k4 = derivs(y0 + dt * k3, thist + dt, *args, **kwargs)
        yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
    return yout
Exemplo n.º 10
0
    def forward(self, state, action, next_state):
        """See `AbstractReward.forward()'."""
        bk = get_backend(state)
        with torch.no_grad():
            # goal = state[..., -3:]
            dist_to_target = self.get_ee_position(next_state) - self.goal

        if self.sparse:
            reward_state = bk.exp(-bk.square(dist_to_target).sum(-1) /
                                  (self.length_scale**2))
        else:
            reward_state = -bk.square(dist_to_target).sum(-1)

        return self.get_reward(reward_state, self.action_reward(action))
Exemplo n.º 11
0
def discount_cumsum(rewards,
                    gamma=1.0,
                    reward_transformer=RewardTransformer()):
    r"""Get discounted cumulative sum of an array.

    Given a vector [r0, r1, r2], the discounted cum sum is another vector:
    .. math:: [r0 + gamma r1 + gamma^2 r2, r1 + gamma r2, r2].


    Parameters
    ----------
    rewards: Array.
        Array of rewards
    gamma: float, optional.
        Discount factor.
    reward_transformer: RewardTransformer, optional.

    Returns
    -------
    discounted_returns: Array.
        Sum of discounted returns.

    References
    ----------
    From rllab.
    """
    rewards = reward_transformer(rewards)
    bk = get_backend(rewards)
    if bk is torch and not rewards.requires_grad:
        rewards = rewards.numpy()
    if type(rewards) is np.ndarray:
        returns = scipy.signal.lfilter([1], [1, -gamma],
                                       rewards[..., ::-1])[..., ::-1]
        returns = returns.copy(
        )  # The copy is for future transforms to pytorch

        if bk is torch:
            return torch.tensor(returns, dtype=torch.get_default_dtype())
        else:
            return returns

    val = torch.zeros_like(rewards)
    r = 0
    for i, reward in enumerate(reversed(rewards)):
        r = reward + gamma * r
        val[-1 - i] = r
    return val
Exemplo n.º 12
0
    def _ode(self, _, state, action):
        """Compute the state time-derivative.

        Parameters
        ----------
        state: ndarray or Tensor
            States.
        action: ndarray or Tensor
            Actions.

        Returns
        -------
        state_derivative: Tensor
            The state derivative according to the dynamics.

        """
        bk = get_backend(state)
        # Physical dynamics
        pendulum_mass = self.pendulum_mass
        cart_mass = self.cart_mass
        length = self.length
        b = self.rot_friction
        g = self.gravity

        total_mass = pendulum_mass + cart_mass

        x, theta, v, omega = state

        x_dot = v
        theta_dot = omega

        det = length * (cart_mass + pendulum_mass * bk.sin(theta)**2)
        v_dot = ((action - pendulum_mass * length *
                  (omega**2) * bk.sin(theta) - b * omega * bk.cos(theta) +
                  0.5 * pendulum_mass * g * length * bk.sin(2 * theta)) *
                 length / det)
        omega_dot = (action * bk.cos(theta) - 0.5 * pendulum_mass * length *
                     (omega**2) * bk.sin(2 * theta) - b * total_mass * omega /
                     (pendulum_mass * length) +
                     total_mass * g * bk.sin(theta)) / det

        return np.array((x_dot, theta_dot, v_dot, omega_dot))
Exemplo n.º 13
0
    def forward(self, state, action, next_state):
        """See `AbstractReward.forward()'."""
        bk = get_backend(state)
        if bk == np:
            goal = np.array(self.goal)
        else:
            goal = self.goal

        if isinstance(state, torch.Tensor):
            state = state.detach()
        tip_pos = self.get_ee_position(state)
        obj_pos = state[..., -3:]

        dist_to_obj = obj_pos - tip_pos
        dist_to_goal = obj_pos - goal

        reward_dist_to_obj = -bk.abs(dist_to_obj).sum(-1)
        reward_dist_to_goal = -bk.abs(dist_to_goal)[..., :-1].sum(-1)
        reward_state = 1.25 * reward_dist_to_goal + 0.5 * reward_dist_to_obj

        self.reward_dist_to_obj = 0.5 * reward_dist_to_obj
        self.reward_dist_to_goal = 1.25 * reward_dist_to_goal

        return self.get_reward(reward_state, self.action_reward(action))
Exemplo n.º 14
0
 def drag_force(self, velocity):
     """Get drag force."""
     bk = get_backend(velocity)
     c = 1.2 + 0.2 * bk.sin(bk.abs(velocity))
     return c * velocity * bk.abs(velocity)
Exemplo n.º 15
0
def test_backend():
    assert torch == get_backend(torch.randn(4))
Exemplo n.º 16
0
 def bk(self):
     """Get current backend of environment."""
     return get_backend(self.state)
Exemplo n.º 17
0
    def test_correctness(self):
        assert get_backend(torch.randn(4)) is torch
        assert get_backend(np.random.randn(3)) is np

        with pytest.raises(TypeError):
            get_backend([1, 2, 3])
Exemplo n.º 18
0
 def _get_ee_pos(self, x0, theta):
     bk = get_backend(x0)
     sin, cos = bk.sin(theta), bk.cos(theta)
     return bk.stack([x0 - self.length * sin, -self.length * (1 + cos)], -1)