def step(self, action): # Rescale action from [-1,+1] to [0,1] motors = (1 + action) / 2 # Call parent-class method to do basic state update, return whether vehicle crashed crashed = CopterEnv._update(self, motors) # Update target position. For now we just make it go in circles self.state[12] = 10 * np.cos(self.target_theta) self.state[13] = 10 * np.sin(self.target_theta) self.target_theta += .0025 # Fake up reward for now reward = 0 return self.state, reward, crashed, {}
def step(self, action): # Rescale action from [-1,+1] to [0,1] motor = (1 + action[0]) / 2 # Use it for all four motors motors = motor * np.ones(4) # Call parent-class step() to do basic update crashed = CopterEnv._update(self, motors) # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity altitude = -self.state[4] velocity = -self.state[5] #costs = (altitude-self.target)**2 + .1*velocity**2 + .001*(motor**2) costs = (altitude - self.target)**2 + velocity**2 # False = max_episodes in registry determines whether we're done return (altitude, velocity), -costs, crashed, {}