schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(10000) ######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.05 agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Medium, number_of_lstm_cells=256) agent_params.exploration = CategoricalParameters() ############### # Environment # ############### env_params = Atari() env_params.level = SingleLevelSelection(atari_deterministic_v4) vis_params = VisualizationParameters() vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] vis_params.dump_mp4 = True ######## # Test # ######## preset_validation_params = PresetValidationParameters()
agent_params.algorithm.beta_entropy = 0.05 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048) agent_params.network_wrappers["main"].learning_rate = 0.0003 agent_params.network_wrappers["main"].input_embedders_parameters[ "observation" ].activation_function = "tanh" agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh" agent_params.network_wrappers["main"].batch_size = 64 agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5 agent_params.network_wrappers["main"].clip_gradients = 40.0 agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ############### # Environment # ############### env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim") ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 400
######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 10000000 agent_params.algorithm.beta_entropy = 0.0001 agent_params.network_wrappers['main'].learning_rate = 0.00001 agent_params.input_filter = MujocoInputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 20.)) agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) agent_params.exploration = ContinuousEntropyParameters() ############### # Environment # ############### env_params = Mujoco() env_params.level = SingleLevelSelection(mujoco_v2) vis_params = VisualizationParameters() vis_params.video_dump_methods = [ SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod() ] vis_params.dump_mp4 = False ########
agent_params = ActorCriticAgentParameters() agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.gae_lambda = 0.96 agent_params.algorithm.beta_entropy = 0 agent_params.network_wrappers['main'].clip_gradients = 10.0 agent_params.network_wrappers['main'].learning_rate = 0.00001 # agent_params.network_wrappers['main'].batch_size = 20 agent_params.network_wrappers['main'].input_embedders_parameters = { "screen": InputEmbedderParameters(input_rescaling={'image': 3.0}) } agent_params.exploration = AdditiveNoiseParameters() agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05) # agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000) agent_params.exploration.evaluation_noise_percentage = 0.05 agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 ############### # Environment # ############### env_params = StarCraft2EnvironmentParameters(level='CollectMineralShards') env_params.feature_screen_maps_to_use = [5] env_params.feature_minimap_maps_to_use = [5]
agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.05 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048) agent_params.network_wrappers['main'].learning_rate = 0.0003 agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'tanh' agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(64)] agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(64)] agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'tanh' agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].clip_gradients = 40. agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ############### # Environment # ############### env_params = GymVectorEnvironment(level='autoscalesim:SimpleScalableWebserviceSim') ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 400