def _ode(self, _, state, action): """Compute the state time-derivative. Parameters ---------- state : ndarray action : ndarray Returns ------- x_dot : Tensor The derivative of the state. """ # Physical dynamics bk = get_backend(state) gravity = self.gravity length = self.length friction = self.friction inertia = self.inertia if bk == np: action = action.clip(-1.0, 1.0) else: action = action.clamp(-1.0, 1.0) self.last_action = action[0] angle, angular_velocity = state x_ddot = (gravity / length * bk.sin(angle) + action[..., 0] / inertia - friction / inertia * angular_velocity) return np.array((angular_velocity, x_ddot))
def set_state(self, observation): """Set state from a given observation.""" bk = get_backend(observation) self.state = bk.zeros_like(observation[..., :2]) self.state[..., 0] = self.atan2(observation[..., 1], observation[..., 0]) self.state[..., 1] = observation[..., 2]
def _ode(self, t, state, action): """Compute the state time-derivative. Parameters ---------- state: ndarray or Tensor States. action: ndarray or Tensor Actions. Returns ------- state_derivative: Tensor The state derivative according to the dynamics. """ bk = get_backend(state) # Physical dynamics velocity = state[..., 1] x_dot = velocity if bk == np: acceleration = bk.clip(action[..., 0], -self.max_action, self.max_action) else: acceleration = bk.clamp(action[..., 0], -self.max_action, self.max_action) v_dot = acceleration return bk.stack((x_dot, v_dot), -1)
def thrust(self, velocity, thrust): """Get the thrust coefficient.""" bk = get_backend(velocity) return (-0.5 * bk.tanh(0.1 * (bk.abs(self.drag_force(velocity) - thrust) - 30.0)) + 0.5)
def _ode(self, _, state, action): """Compute the state time-derivative. Parameters ---------- state: ndarray or Tensor States. action: ndarray or Tensor Actions. Returns ------- state_derivative: Tensor The state derivative according to the dynamics. """ # Physical dynamics velocity = state u = action bk = get_backend(velocity) m = 3.0 + 1.5 * bk.sin(bk.abs(velocity)) # mass k = self.thrust(velocity, u) # thrust coefficient. v_dot = (k * u - self.drag_force(velocity)) / m return v_dot
def forward(self, state, action, next_state): """See `AbstractReward.forward()'.""" bk = get_backend(next_state) end_effector = self._get_ee_pos(next_state[..., 0], next_state[..., 1]) reward_state = bk.exp(-bk.square(end_effector).sum(-1) / (self.length**2)) return self.get_reward(reward_state, self.action_reward(action))
def action_reward(self, action): """Get action reward.""" action = action[..., :self.dim_action[0]] # get only true dimensions. bk = get_backend(action) if self.sparse: return bk.exp(-bk.square(action / self.action_scale).sum(-1)) - 1 else: return -bk.square(action).sum(-1)
def get_ee_position(state): """Get the end effector position.""" bk = get_backend(state) theta1, theta2 = state[..., 0], state[..., 1] theta3, theta4 = state[..., 2:3], state[..., 3:4] theta5, theta6 = state[..., 4:5], state[..., 5:6] rot_axis = bk.stack( [ bk.cos(theta2) * bk.cos(theta1), bk.cos(theta2) * bk.sin(theta1), -bk.sin(theta2), ], -1, ) rot_perp_axis = bk.stack( [-bk.sin(theta1), bk.cos(theta1), bk.zeros_like(theta1)], -1) cur_end = bk.stack( [ 0.1 * bk.cos(theta1) + 0.4 * bk.cos(theta1) * bk.cos(theta2), 0.1 * bk.sin(theta1) + 0.4 * bk.sin(theta1) * bk.cos(theta2) - 0.188, -0.4 * bk.sin(theta2), ], -1, ) for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]: perp_all_axis = np.cross(rot_axis, rot_perp_axis) if bk is torch: perp_all_axis = torch.tensor(perp_all_axis, dtype=torch.get_default_dtype()) x = rot_axis * bk.cos(hinge) y = bk.sin(hinge) * bk.sin(roll) * rot_perp_axis z = -bk.sin(hinge) * bk.cos(roll) * perp_all_axis new_rot_axis = x + y + z new_rot_perp_axis = np.cross(new_rot_axis, rot_axis) if bk is torch: new_rot_perp_axis = torch.tensor( new_rot_perp_axis, dtype=torch.get_default_dtype()) norm = bk.sqrt(bk.square(new_rot_perp_axis).sum(-1)) new_rot_perp_axis[norm < 1e-30] = rot_perp_axis[norm < 1e-30] new_rot_perp_axis /= bk.sqrt( bk.square(new_rot_perp_axis).sum(-1))[..., None] rot_axis, rot_perp_axis = new_rot_axis, new_rot_perp_axis cur_end = cur_end + length * new_rot_axis return cur_end
def rk4(derivs, y0, t, *args, **kwargs): """Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta. This is a toy implementation which may be useful if you find yourself stranded on a system w/o scipy. Otherwise use :func:`scipy.integrate`. *y0* initial state vector *t* sample times *derivs* returns the derivative of the system and has the signature ``dy = derivs(yi, ti)`` *args* additional arguments passed to the derivative function *kwargs* additional keyword arguments passed to the derivative function Example 1 :: ## 2D system def derivs6(x,t): d1 = x[0] + 2*x[1] d2 = -3*x[0] + 4*x[1] return (d1, d2) dt = 0.0005 t = arange(0.0, 2.0, dt) y0 = (1,2) yout = rk4(derivs6, y0, t) Example 2:: ## 1D system alpha = 2 def derivs(x,t): return -alpha*x + exp(-t) y0 = 1 yout = rk4(derivs, y0, t) If you have access to scipy, you should probably be using the scipy.integrate tools rather than this function. """ bk = get_backend(y0) yout = bk.zeros((len(t), *y0.shape)) yout[0] = y0 for i in np.arange(len(t) - 1): thist = t[i] dt = t[i + 1] - thist dt2 = dt / 2.0 y0 = yout[i] k1 = derivs(y0, thist, *args, **kwargs) k2 = derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs) k3 = derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs) k4 = derivs(y0 + dt * k3, thist + dt, *args, **kwargs) yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4) return yout
def forward(self, state, action, next_state): """See `AbstractReward.forward()'.""" bk = get_backend(state) with torch.no_grad(): # goal = state[..., -3:] dist_to_target = self.get_ee_position(next_state) - self.goal if self.sparse: reward_state = bk.exp(-bk.square(dist_to_target).sum(-1) / (self.length_scale**2)) else: reward_state = -bk.square(dist_to_target).sum(-1) return self.get_reward(reward_state, self.action_reward(action))
def discount_cumsum(rewards, gamma=1.0, reward_transformer=RewardTransformer()): r"""Get discounted cumulative sum of an array. Given a vector [r0, r1, r2], the discounted cum sum is another vector: .. math:: [r0 + gamma r1 + gamma^2 r2, r1 + gamma r2, r2]. Parameters ---------- rewards: Array. Array of rewards gamma: float, optional. Discount factor. reward_transformer: RewardTransformer, optional. Returns ------- discounted_returns: Array. Sum of discounted returns. References ---------- From rllab. """ rewards = reward_transformer(rewards) bk = get_backend(rewards) if bk is torch and not rewards.requires_grad: rewards = rewards.numpy() if type(rewards) is np.ndarray: returns = scipy.signal.lfilter([1], [1, -gamma], rewards[..., ::-1])[..., ::-1] returns = returns.copy( ) # The copy is for future transforms to pytorch if bk is torch: return torch.tensor(returns, dtype=torch.get_default_dtype()) else: return returns val = torch.zeros_like(rewards) r = 0 for i, reward in enumerate(reversed(rewards)): r = reward + gamma * r val[-1 - i] = r return val
def _ode(self, _, state, action): """Compute the state time-derivative. Parameters ---------- state: ndarray or Tensor States. action: ndarray or Tensor Actions. Returns ------- state_derivative: Tensor The state derivative according to the dynamics. """ bk = get_backend(state) # Physical dynamics pendulum_mass = self.pendulum_mass cart_mass = self.cart_mass length = self.length b = self.rot_friction g = self.gravity total_mass = pendulum_mass + cart_mass x, theta, v, omega = state x_dot = v theta_dot = omega det = length * (cart_mass + pendulum_mass * bk.sin(theta)**2) v_dot = ((action - pendulum_mass * length * (omega**2) * bk.sin(theta) - b * omega * bk.cos(theta) + 0.5 * pendulum_mass * g * length * bk.sin(2 * theta)) * length / det) omega_dot = (action * bk.cos(theta) - 0.5 * pendulum_mass * length * (omega**2) * bk.sin(2 * theta) - b * total_mass * omega / (pendulum_mass * length) + total_mass * g * bk.sin(theta)) / det return np.array((x_dot, theta_dot, v_dot, omega_dot))
def forward(self, state, action, next_state): """See `AbstractReward.forward()'.""" bk = get_backend(state) if bk == np: goal = np.array(self.goal) else: goal = self.goal if isinstance(state, torch.Tensor): state = state.detach() tip_pos = self.get_ee_position(state) obj_pos = state[..., -3:] dist_to_obj = obj_pos - tip_pos dist_to_goal = obj_pos - goal reward_dist_to_obj = -bk.abs(dist_to_obj).sum(-1) reward_dist_to_goal = -bk.abs(dist_to_goal)[..., :-1].sum(-1) reward_state = 1.25 * reward_dist_to_goal + 0.5 * reward_dist_to_obj self.reward_dist_to_obj = 0.5 * reward_dist_to_obj self.reward_dist_to_goal = 1.25 * reward_dist_to_goal return self.get_reward(reward_state, self.action_reward(action))
def drag_force(self, velocity): """Get drag force.""" bk = get_backend(velocity) c = 1.2 + 0.2 * bk.sin(bk.abs(velocity)) return c * velocity * bk.abs(velocity)
def test_backend(): assert torch == get_backend(torch.randn(4))
def bk(self): """Get current backend of environment.""" return get_backend(self.state)
def test_correctness(self): assert get_backend(torch.randn(4)) is torch assert get_backend(np.random.randn(3)) is np with pytest.raises(TypeError): get_backend([1, 2, 3])
def _get_ee_pos(self, x0, theta): bk = get_backend(x0) sin, cos = bk.sin(theta), bk.cos(theta) return bk.stack([x0 - self.length * sin, -self.length * (1 + cos)], -1)