def objective(objective_args): (z_scale, zdot_reward, action_reward, exploration, tolerance, max_mem_a, k_a, alpha_a, k_c, alpha_c, max_mem_pm, k_pm, pred_tol, lambda_trace, gamma) = objective_args (df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1, df_u3) = QuadRotor2DPlant.get_default_feature_set() feature_z = Feature(r"$z$ [m]", scale=z_scale, bounds=np.array([-25, 0])) quad_rotor_plant = QuadRotor2DPlant(1. / FREQUENCY, blade_flapping=BLADE_FLAPPING, init_mean=DEFAULT_INIT_STATE_MEAN, feature_set=FeatureSet([ df_x, feature_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1, df_u3, ])) train_args = ( Actor( FeatureSet([feature_z, df_zdot]), FeatureSet([df_u1]), quad_rotor_plant.get_feature_set(), k_a, max_mem_a * 50, alpha_a, tolerance, ), Critic( FeatureSet([feature_z, df_zdot]), quad_rotor_plant.get_feature_set(), k_c, max_mem_a * 50, lambda_trace, alpha_c, gamma, QuadraticErrorRewardFunction([action_reward, 0], [0, 10., 0, zdot_reward, 0, 0], desired_state=DESIRED_STATE), tolerance), PlantModel( quad_rotor_plant.get_feature_set(), FeatureSet([feature_z, df_zdot, df_u1]), k_pm, max_mem_pm * 50, pred_tol, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy({1: exploration}), ) cs = ControllerSet( parallelize( parsed_args.j, train, [train_args + (SEED + i, ) for i in range(parsed_args.p)], )) result = SimulationResult( cs.lookback_result( LOOK_BACK_WINDOW, look_back_metric="median", ), metric=parsed_args.metric, ) training_message = "Finished training with cumulative z-error {:.2f}".format( result.get_cum_state_error().flatten()[1]) print(training_message) return result.get_cum_state_error().flatten()[1]
def higher_discount_rate(): quad_rotor_plant = QuadRotor2DPlant( 1. / FREQUENCY, blade_flapping=BLADE_FLAPPING, init_mean=DEFAULT_INIT_STATE_MEAN, ) actor_critic_args = ( Actor( FeatureSet([feature_z, feature_zdot]), FeatureSet([feature_u1]), quad_rotor_plant.get_feature_set(), K_ACTOR, STAGE_ONE_AC_MEMORY, ALPHA_ACTOR, TOLERANCE_ACTOR, ), Critic( FeatureSet([feature_z, feature_zdot]), quad_rotor_plant.get_feature_set(), K_CRITIC, STAGE_ONE_AC_MEMORY, LAMBDA_TRACE, ALPHA_CRITIC, 0.99, QuadraticErrorRewardFunction( ACTION_REWARDS, STATE_REWARDS, desired_state=DESIRED_STATE ), TOLERANCE_CRITIC, ), PlantModel( quad_rotor_plant.get_feature_set(), FeatureSet([feature_z, feature_zdot, feature_u1]), K_PLANT_MODEL, STAGE_ONE_PM_MEMORY, PREDICTION_TOLERANCE, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) print("Starting training of quad-rotor with higher discount rate.") trained_cs = ControllerSet( parallelize( parsed_args.j, train, [actor_critic_args + (SEED + i,) for i in range(parsed_args.p)], ) ) print("Finished higher discount rate with {:.2f} (id={})".format( SimulationResult( trained_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric ).get_cum_state_error()[1:2].sum(), trained_cs.get_id(), )) trained_cs.notes = "Sensitivity analysis: higher discount rate" # trained_cs.dump() RewardSet(trained_cs).dump()
), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) print("Starting training of quad-rotor.") # STAGE ONE first_stage_cs = ControllerSet( parallelize( parsed_args.j, train_stage_one, [ actor_critic_args + (SEED + i, ) for i in range(parsed_args.p) ], )) print("Finished stage one with {:.2f}".format( SimulationResult( first_stage_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error()[1:2].sum())) # ZERO EXPANSION zero_expansion = ControllerSet( parallelize( parsed_args.j, train_zero_expansion, [deepcopy(ac) for ac in first_stage_cs], ))
def objective(objective_args): ( a2_scale, theta_scale, thetadot_scale, max_mem_ac, max_mem_pm, theta_spread, thetadot_spread, exploration, theta_reward, thetadot_reward, u_3_reward, k_a, k_c, k_pm, ) = objective_args feature_theta = Feature(r"$\theta$ [rad]", scale=theta_scale), feature_thetadot = Feature(r"$\dot{\theta}$ [rad/s]", scale=thetadot_scale, derivative=True) feature_a2 = Feature(r"$a_2$ [-]", feature_type="action", scale=0.760859, bounds=0.3 * np.array([-1, 1])) quad_rotor_plant = QuadRotor2DPlant( 1. / FREQUENCY, blade_flapping=BLADE_FLAPPING, init_mean=DEFAULT_INIT_STATE_MEAN, feature_set=FeatureSet([ df_x, df_z, df_xdot, df_zdot, feature_theta, feature_thetadot, df_a1, feature_a2 ]), ) stage_one_args = [ Actor( FeatureSet([df_z, df_zdot]), FeatureSet([df_a1]), quad_rotor_plant.get_feature_set(), K_ACTOR, STAGE_ONE_AC_MEMORY, ALPHA_ACTOR, TOLERANCE_ACTOR, ), Critic( FeatureSet([df_z, df_zdot]), quad_rotor_plant.get_feature_set(), K_CRITIC, STAGE_ONE_AC_MEMORY, LAMBDA_TRACE, ALPHA_CRITIC, DISCOUNT, QuadraticErrorRewardFunction(ACTION_REWARDS, STATE_REWARDS, desired_state=DESIRED_STATE), TOLERANCE_CRITIC, ), PlantModel( quad_rotor_plant.get_feature_set(), FeatureSet([df_z, df_zdot, df_a1]), K_PLANT_MODEL, STAGE_ONE_PM_MEMORY, PREDICTION_TOLERANCE, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(STAGE_ONE_EXPLORATION_DICT) ] print("Training basic quad-rotor") # STAGE ONE cs_stage_one = ControllerSet( parallelize( parsed_args.j, train_stage_one, [stage_one_args + [SEED + i] for i in range(parsed_args.p)], )) _, z_error_stage_one, _, _, _, _ = SimulationResult( cs_stage_one.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error().flatten() print("Finished stage one with {:s} cumulative z-error of {:.2f}".format( parsed_args.metric, z_error_stage_one)) stage_two_args = [ max_mem_ac, max_mem_pm, theta_spread, thetadot_spread, exploration, theta_reward, thetadot_reward, u_3_reward, k_a, k_c, k_pm, feature_theta, feature_thetadot, feature_a2 ] cs_stage_two = ControllerSet( parallelize( parsed_args.j, train_stage_two, [stage_two_args + [deepcopy(ac)] for ac in cs_stage_one], )) x_error, z_error, _, _, theta_error, _ = SimulationResult( cs_stage_two.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error().flatten() return z_error
STAGE_ONE_PM_MEMORY, PREDICTION_TOLERANCE, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) # STAGE ONE trained_cs = ControllerSet( parallelize( parsed_args.j, train, [ actor_critic_args + (SEED + i, ) for i in range(parsed_args.p) ], )) print("Finished Gaussian clone expansion (id={}) with {:.2f}".format( trained_cs.get_id(), SimulationResult( trained_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error()[1:2].sum())) trained_cs.dump() RewardSet(trained_cs).dump() except KeyboardInterrupt: print("Shutdown requested... exiting") finally: