def __init__(self): CopterEnv.__init__(self) # Initialize state self._init() # Action space = motors, rescaled from [0,1] to [-1,+1] self.action_space = spaces.Box(np.array([-1] * 4), np.array([1] * 4)) # Observation space = full state space self.observation_space = spaces.Box(np.array([-np.inf] * 12), np.array([+np.inf] * 12))
def __init__(self, dt=.001, target=10, tolerance=1.0): CopterEnv.__init__(self, dt) self.target = target self.tolerance = tolerance # Action space = motors, rescaled from [0,1] to [-1,+1] self.action_space = spaces.Box(np.array([-1]), np.array([1])) # Observation space = altitude, vertical_velocity self.observation_space = spaces.Box(np.array([0,-np.inf]), np.array([np.inf,np.inf])) self._init()
def __init__(self): CopterEnv.__init__(self, statedims=15) # Initialize state self._init() # Action space = motors, rescaled from [0,1] to [-1,+1] self.action_space = spaces.Box(np.array([-1]*4), np.array([1]*4)) # Observation space = full state space plus target position self.observation_space = spaces.Box(np.array([-np.inf]*15), np.array([+np.inf]*15)) self.target_theta = 0
def step(self, action): # Rescale action from [-1,+1] to [0,1] motor = (1 + action[0]) / 2 # Use it for all four motors motors = motor * np.ones(4) # Call parent-class step() to do basic update CopterEnv._update(self, motors) # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity altitude = -self.state[4] velocity = -self.state[5] # Get a reward for every timestep on target self.reward += (int)(abs(altitude - self.target) < self.tolerance) # False = max_episodes in registry determines whether we're done return (altitude, velocity), self.reward, False, {}
def step(self, action): # Rescale action from [-1,+1] to [0,1] motors = (1 + action) / 2 # Call parent-class method to do basic state update, return whether vehicle crashed crashed = CopterEnv._update(self, motors) # Integrate position self.position += self.state[0:5:2] # Reward is logarithm of Euclidean distance from origin reward = np.sqrt(np.sum(self.position[0:2]**2)) return self.state, reward, crashed, {}
def step(self, action): # Rescale action from [-1,+1] to [0,1] motors = (1 + action) / 2 # Call parent-class method to do basic state update, return whether vehicle crashed crashed = CopterEnv._update(self, motors) # Update target position. For now we just make it go in circles self.state[12] = 10 * np.cos(self.target_theta) self.state[13] = 10 * np.sin(self.target_theta) self.target_theta += .0025 # Fake up reward for now reward = 0 return self.state, reward, crashed, {}
def step(self, action): # Rescale action from [-1,+1] to [0,1] motor = (1 + action[0]) / 2 # Use it for all four motors motors = motor * np.ones(4) # Call parent-class step() to do basic update crashed = CopterEnv._update(self, motors) # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity altitude = -self.state[4] velocity = -self.state[5] #costs = (altitude-self.target)**2 + .1*velocity**2 + .001*(motor**2) costs = (altitude - self.target)**2 + velocity**2 # False = max_episodes in registry determines whether we're done return (altitude, velocity), -costs, crashed, {}
def reset(self): CopterEnv.reset(self) self._init() return -self.state[4:6]