Exemplo n.º 1
0
def objective(objective_args):
    (z_scale, zdot_reward, action_reward, exploration, tolerance, max_mem_a,
     k_a, alpha_a, k_c, alpha_c, max_mem_pm, k_pm, pred_tol, lambda_trace,
     gamma) = objective_args

    (df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1,
     df_u3) = QuadRotor2DPlant.get_default_feature_set()

    feature_z = Feature(r"$z$ [m]", scale=z_scale, bounds=np.array([-25, 0]))
    quad_rotor_plant = QuadRotor2DPlant(1. / FREQUENCY,
                                        blade_flapping=BLADE_FLAPPING,
                                        init_mean=DEFAULT_INIT_STATE_MEAN,
                                        feature_set=FeatureSet([
                                            df_x,
                                            feature_z,
                                            df_xdot,
                                            df_zdot,
                                            df_theta,
                                            df_thetadot,
                                            df_u1,
                                            df_u3,
                                        ]))

    train_args = (
        Actor(
            FeatureSet([feature_z, df_zdot]),
            FeatureSet([df_u1]),
            quad_rotor_plant.get_feature_set(),
            k_a,
            max_mem_a * 50,
            alpha_a,
            tolerance,
        ),
        Critic(
            FeatureSet([feature_z,
                        df_zdot]), quad_rotor_plant.get_feature_set(), k_c,
            max_mem_a * 50, lambda_trace, alpha_c, gamma,
            QuadraticErrorRewardFunction([action_reward, 0],
                                         [0, 10., 0, zdot_reward, 0, 0],
                                         desired_state=DESIRED_STATE),
            tolerance),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, df_zdot, df_u1]),
            k_pm,
            max_mem_pm * 50,
            pred_tol,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy({1: exploration}),
    )

    cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [train_args + (SEED + i, ) for i in range(parsed_args.p)],
        ))

    result = SimulationResult(
        cs.lookback_result(
            LOOK_BACK_WINDOW,
            look_back_metric="median",
        ),
        metric=parsed_args.metric,
    )
    training_message = "Finished training with cumulative z-error {:.2f}".format(
        result.get_cum_state_error().flatten()[1])
    print(training_message)
    return result.get_cum_state_error().flatten()[1]
Exemplo n.º 2
0
def higher_discount_rate():
    quad_rotor_plant = QuadRotor2DPlant(
        1. / FREQUENCY,
        blade_flapping=BLADE_FLAPPING,
        init_mean=DEFAULT_INIT_STATE_MEAN,
    )
    actor_critic_args = (
        Actor(
            FeatureSet([feature_z, feature_zdot]),
            FeatureSet([feature_u1]),
            quad_rotor_plant.get_feature_set(),
            K_ACTOR,
            STAGE_ONE_AC_MEMORY,
            ALPHA_ACTOR,
            TOLERANCE_ACTOR,
        ),
        Critic(
            FeatureSet([feature_z, feature_zdot]),
            quad_rotor_plant.get_feature_set(),
            K_CRITIC,
            STAGE_ONE_AC_MEMORY,
            LAMBDA_TRACE,
            ALPHA_CRITIC,
            0.99,
            QuadraticErrorRewardFunction(
                ACTION_REWARDS,
                STATE_REWARDS,
                desired_state=DESIRED_STATE
            ),
            TOLERANCE_CRITIC,
        ),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, feature_zdot, feature_u1]),
            K_PLANT_MODEL,
            STAGE_ONE_PM_MEMORY,
            PREDICTION_TOLERANCE,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy(EXPLORATION_DICT),
    )

    print("Starting training of quad-rotor with higher discount rate.")
    trained_cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [actor_critic_args + (SEED + i,) for i in range(parsed_args.p)],
        )
    )
    print("Finished higher discount rate with {:.2f} (id={})".format(
        SimulationResult(
            trained_cs.lookback_result(LOOK_BACK_WINDOW),
            metric=parsed_args.metric
        ).get_cum_state_error()[1:2].sum(),
        trained_cs.get_id(),
    ))
    trained_cs.notes = "Sensitivity analysis: higher discount rate"
    # trained_cs.dump()
    RewardSet(trained_cs).dump()
Exemplo n.º 3
0
    filemode="w"
)
SEED = 4124135
np.random.seed(SEED)
np.set_printoptions(
    precision=4,
    linewidth=200,
    suppress=True,
)
np.seterr(divide="raise", invalid="raise")


(feature_x, feature_z,
 feature_xdot, feature_zdot,
 feature_theta, feature_thetadot,
 feature_u1, feature_u3) = QuadRotor2DPlant.get_default_feature_set()

LOOK_BACK_WINDOW = 5
FREQUENCY = 50.  # Hz
BLADE_FLAPPING = True
DEFAULT_ADD_METHOD = "mean"
DEFAULT_PURGE_METHOD = "age-weighted"
DEFAULT_LENGTH = 3  # seconds
STAGE_ONE_EPISODES = 75
EXPLORATION_DICT = {1: 2, 51: 3, 101: 3}
DEFAULT_INIT_STATE_MEAN = np.array([[0, -9., 0, 0, 0, 0]]).T
DESIRED_STATE = np.array([[0, -10., 0, 0, 0, 0]]).T
AGE_THRESHOLD = 45.

# Actor
ALPHA_ACTOR = 0.15032140063618069
Exemplo n.º 4
0
def objective(objective_args):
    (
        a2_scale,
        theta_scale,
        thetadot_scale,
        max_mem_ac,
        max_mem_pm,
        theta_spread,
        thetadot_spread,
        exploration,
        theta_reward,
        thetadot_reward,
        u_3_reward,
        k_a,
        k_c,
        k_pm,
    ) = objective_args

    feature_theta = Feature(r"$\theta$ [rad]", scale=theta_scale),
    feature_thetadot = Feature(r"$\dot{\theta}$ [rad/s]",
                               scale=thetadot_scale,
                               derivative=True)
    feature_a2 = Feature(r"$a_2$ [-]",
                         feature_type="action",
                         scale=0.760859,
                         bounds=0.3 * np.array([-1, 1]))
    quad_rotor_plant = QuadRotor2DPlant(
        1. / FREQUENCY,
        blade_flapping=BLADE_FLAPPING,
        init_mean=DEFAULT_INIT_STATE_MEAN,
        feature_set=FeatureSet([
            df_x, df_z, df_xdot, df_zdot, feature_theta, feature_thetadot,
            df_a1, feature_a2
        ]),
    )

    stage_one_args = [
        Actor(
            FeatureSet([df_z, df_zdot]),
            FeatureSet([df_a1]),
            quad_rotor_plant.get_feature_set(),
            K_ACTOR,
            STAGE_ONE_AC_MEMORY,
            ALPHA_ACTOR,
            TOLERANCE_ACTOR,
        ),
        Critic(
            FeatureSet([df_z, df_zdot]),
            quad_rotor_plant.get_feature_set(),
            K_CRITIC,
            STAGE_ONE_AC_MEMORY,
            LAMBDA_TRACE,
            ALPHA_CRITIC,
            DISCOUNT,
            QuadraticErrorRewardFunction(ACTION_REWARDS,
                                         STATE_REWARDS,
                                         desired_state=DESIRED_STATE),
            TOLERANCE_CRITIC,
        ),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([df_z, df_zdot, df_a1]),
            K_PLANT_MODEL,
            STAGE_ONE_PM_MEMORY,
            PREDICTION_TOLERANCE,
        ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy(STAGE_ONE_EXPLORATION_DICT)
    ]

    print("Training basic quad-rotor")

    # STAGE ONE
    cs_stage_one = ControllerSet(
        parallelize(
            parsed_args.j,
            train_stage_one,
            [stage_one_args + [SEED + i] for i in range(parsed_args.p)],
        ))

    _, z_error_stage_one, _, _, _, _ = SimulationResult(
        cs_stage_one.lookback_result(LOOK_BACK_WINDOW),
        metric=parsed_args.metric).get_cum_state_error().flatten()
    print("Finished stage one with {:s} cumulative z-error of {:.2f}".format(
        parsed_args.metric, z_error_stage_one))

    stage_two_args = [
        max_mem_ac, max_mem_pm, theta_spread, thetadot_spread, exploration,
        theta_reward, thetadot_reward, u_3_reward, k_a, k_c, k_pm,
        feature_theta, feature_thetadot, feature_a2
    ]
    cs_stage_two = ControllerSet(
        parallelize(
            parsed_args.j,
            train_stage_two,
            [stage_two_args + [deepcopy(ac)] for ac in cs_stage_one],
        ))
    x_error, z_error, _, _, theta_error, _ = SimulationResult(
        cs_stage_two.lookback_result(LOOK_BACK_WINDOW),
        metric=parsed_args.metric).get_cum_state_error().flatten()
    return z_error
Exemplo n.º 5
0
LAMBDA_TRACE = 0.75055692999458412
STATE_REWARDS = np.array([0, 10., 0, 0.4491648864, 0, 0])
ACTION_REWARDS = np.array([3.551383408, 0])

# Plant model
K_PLANT_MODEL = 9
STAGE_ONE_PM_MEMORY = 350
PREDICTION_TOLERANCE = 4.9763444056056387e-07

# STAGE TWO
STAGE_TWO_EPISODES = 75
STAGE_TWO_INCR_HOLD = 3
STAGE_TWO_METHOD = "clone-gauss"

(df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_a1,
 df_a2) = QuadRotor2DPlant.get_default_feature_set()


def train_stage_one(args):
    actor_critic = ActorCriticController(*args)
    actor_critic.AGE_THRESHOLD = AGE_THRESHOLD
    actor_critic.train(STAGE_ONE_EPISODES)
    return actor_critic


def train_stage_two(args):
    (max_mem_ac, max_mem_pm, theta_spread, thetadot_spread, exploration,
     theta_reward, thetadot_reward, u_3_reward, k_a, k_c, k_pm, feature_theta,
     feature_thetadot, feature_a2, actor_critic) = args

    actor_critic._actor._knn = int(k_a)
Exemplo n.º 6
0
    actor_critic.set_memory_sizes(STAGE_TWO_AC_MEMORY, STAGE_TWO_AC_MEMORY,
                                  STAGE_TWO_PM_MEMORY)
    actor_critic.change_feature(
        FeatureChange(feature_thetadot, "clone-gauss",
                      spread=PITCH_DOT_SPREAD))
    actor_critic.train(int(np.floor(STAGE_TWO_EPISODES / 2)),
                       train_hold=STAGE_TWO_INCR_HOLD)
    return actor_critic


if __name__ == "__main__":
    try:
        quad_rotor_plant = QuadRotor2DPlant(
            1. / FREQUENCY,
            blade_flapping=BLADE_FLAPPING,
            init_mean=DEFAULT_INIT_STATE_MEAN,
        )
        (feature_x, feature_z, feature_xdot, feature_zdot, feature_theta,
         feature_thetadot, feature_u1,
         feature_u3) = quad_rotor_plant.get_feature_set()

        actor_critic_args = (
            Actor(
                FeatureSet([feature_z, feature_zdot]),
                FeatureSet([feature_u1]),
                quad_rotor_plant.get_feature_set(),
                K_ACTOR,
                STAGE_ONE_AC_MEMORY,
                ALPHA_ACTOR,
                TOLERANCE_ACTOR,