Пример #1
0
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motors = (1 + action) / 2

        # Call parent-class method to do basic state update, return whether vehicle crashed
        crashed = CopterEnv._update(self, motors)

        # Update target position.  For now we just make it go in circles
        self.state[12] = 10 * np.cos(self.target_theta)
        self.state[13] = 10 * np.sin(self.target_theta)
        self.target_theta += .0025

        # Fake up reward for now
        reward = 0

        return self.state, reward, crashed, {}
Пример #2
0
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motor = (1 + action[0]) / 2

        # Use it for all four motors
        motors = motor * np.ones(4)

        # Call parent-class step() to do basic update
        crashed = CopterEnv._update(self, motors)

        # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity
        altitude = -self.state[4]
        velocity = -self.state[5]

        #costs = (altitude-self.target)**2 + .1*velocity**2 + .001*(motor**2)
        costs = (altitude - self.target)**2 + velocity**2

        # False = max_episodes in registry determines whether we're done
        return (altitude, velocity), -costs, crashed, {}