def main(): args = parse_cmd_line() env_params = GymVectorEnvironment(level=args.env) if args.mode == 'rnd': print('training on pure random agent experience and training offline') train_on_pure_random(env_params, args.n_epochs, args.dataset_size) elif args.mode == 'olx': print( 'using trained egent to generate experience and training offline') train_using_experience_agent(env_params, args.n_epochs, args.dataset_size) else: # train_on_csv_file('./acrobot_dataset.csv') print('loading experience from csv and training offline') # assert os.path.expanduser(args.mode), 'csv file does not exist' # extract obs_dim, act_dim from the file csv_file_name = os.path.join(os.path.expanduser('~'), 'share/Data/MLA/L2P', args.mode) df = pd.read_csv(csv_file_name) act_dim = 1 + df['action'].max( ) # assuming discrete actions starting from 0 obs_dim = len([s for s in df.columns if 'state_feature' in s]) train_on_csv_file(csv_file_name, args.n_epochs, args.dataset_size, obs_dim=obs_dim, act_dim=act_dim) return
def evaluate(params): # file params experiment_path = os.path.join(params.output_data_dir) logger.experiment_path = os.path.join(experiment_path, 'evaluation') params.checkpoint_restore_dir = os.path.join(params.input_data_dir, 'checkpoint') checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint') inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".") # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace # the absolute path for the evaluation-from-a-checkpointed-model to work vis_params = VisualizationParameters() vis_params.dump_gifs = True task_params = TaskParameters(evaluate_only=True, experiment_path=logger.experiment_path) task_params.__dict__ = add_items_to_dict(task_params.__dict__, params.__dict__) graph_manager = BasicRLGraphManager( agent_params=ClippedPPOAgentParameters(), env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'), schedule_params=ScheduleParameters(), vis_params=vis_params) graph_manager = graph_manager.create_graph(task_parameters=task_params) graph_manager.evaluate(EnvironmentSteps(5))
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 2048) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) agent_params.pre_network_filter.add_observation_filter( 'observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) ############### # Environment # ############### env_params = GymVectorEnvironment(level='knapsack_env:KnapSackMediumEnv') ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = True ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 400
"observation" ].activation_function = "tanh" agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh" agent_params.network_wrappers["main"].batch_size = 64 agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5 agent_params.network_wrappers["main"].clip_gradients = 40.0 agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ############### # Environment # ############### env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim") ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 400 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params,
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int( hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int( hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float( hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps( params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'relu' agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule( 1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter( 'observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'SilverstoneRacetrack-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
#exploration.epsilon_schedule = LinearSchedule(0.5, 0.01, 100000) agent_params.exploration = exploration agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5) ############### # Environment # ############### DeepRacerInputFilter = InputFilter(is_a_reference_filter=True) DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) DeepRacerInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = DeepRacerInputFilter env_params.level = 'RoboMaker-DeepRacer-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False vis_params.tensorboard = True vis_params.print_networks_summary = True ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule from rl_coach.core_types import * #from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach import logger from rl_coach.base_parameters import TaskParameters from rl_coach.base_parameters import VisualizationParameters ################ # Environment # ################ env_params = GymVectorEnvironment(level='VRP_abstract_env:VRPEasyEnv') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs = True #vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
30) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(30) agent_params.exploration = CategoricalParameters() ############### # Environment # ############### DeepRacerInputFilter = InputFilter(is_a_reference_filter=True) DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) DeepRacerInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = DeepRacerInputFilter env_params.level = 'SageMaker-DeepRacer-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 4000 preset_validation_params.max_episodes_to_achieve_reward = 20 graph_manager = BasicRLGraphManager(
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Conv2d(32, [1, 3], 1)] agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty agent_params.network_wrappers['main'].batch_size = 64 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 150000) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ############### # Environment # ############### env_params = GymVectorEnvironment(level='portfolio_env:PortfolioEnv') env_params.__dict__['observation_space_type'] = ObservationSpaceType.Tensor ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu' agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'SilverstoneRacetrack-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
agent_params = BootstrappedDQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.memory.max_size = (MemoryGranularity.Transitions, 1000000) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4) agent_params.network_wrappers['main'].heads_parameters[ 0].num_output_head_copies = num_output_head_copies agent_params.network_wrappers['main'].heads_parameters[ 0].rescale_gradient_from_head_by_factor = 1.0 / num_output_head_copies agent_params.exploration.bootstrapped_data_sharing_probability = 1.0 agent_params.exploration.architecture_num_q_heads = num_output_head_copies agent_params.exploration.epsilon_schedule = ConstantSchedule(0) agent_params.input_filter = NoInputFilter() agent_params.output_filter = NoOutputFilter() ############### # Environment # ############### env_params = GymVectorEnvironment( level= 'rl_coach.environments.toy_problems.exploration_chain:ExplorationChain') env_params.additional_simulator_parameters = { 'chain_length': N, 'max_steps': N + 7 } graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters())
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.architectures.layers import Dense from rl_coach.schedules import LinearSchedule from rl_coach.filters.filter import InputFilter from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters, DistributedCoachSynchronizationType from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters ############### # Environment # ############### import gym_guess_number # pylint: disable=unused-import env_params = GymVectorEnvironment(level='GuessNumber-v0') #################### # Graph Scheduling # #################### training_steps = 200000 schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(training_steps) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ###################### # Agent - ClippedPPO # ######################
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule from rl_coach.core_types import * #from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach import logger from rl_coach.base_parameters import TaskParameters from rl_coach.base_parameters import VisualizationParameters ################ # Environment # ################ env_params = GymVectorEnvironment(level='TSP_env:TSPMediumEnv') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs = True #vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule from rl_coach.core_types import * #from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach import logger from rl_coach.base_parameters import TaskParameters from rl_coach.base_parameters import VisualizationParameters ################ # Environment # ################ env_params = GymVectorEnvironment(level='TSP_env:TSPMediumEnv') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs=True #vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense(200)]) agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.)) agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = False preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 preset_validation_params.num_workers = 8 preset_validation_params.reward_test_level = 'inverted_pendulum' preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper'] graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal', ObservationNormalizationFilter(name='normalize_desired_goal')) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(fetch_v1)) env_params.custom_reward_threshold = -49 ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.trace_test_levels = ['slide', 'pick_and_place', 'push', 'reach'] graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5) ############### # Environment # ############### turtlebot3_input_filter = InputFilter(is_a_reference_filter=True) turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) turtlebot3_input_filter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = turtlebot3_input_filter env_params.level = 'RoboMaker-ObjectTracker-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager(
# NN configuration agent_params.network_wrappers['main'].batch_size = 32 agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(512)] agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()] agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(512)] # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 10000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 40000) ############# # Environment ############# env_params = GymVectorEnvironment(level='trading_env:TradingEnv') ################## # Manage resources ################## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
# from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach import logger from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.base_parameters import TaskParameters, VisualizationParameters from rl_coach.core_types import * from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule ################ # Environment # ################ env_params = GymVectorEnvironment(level="TSP_env:TSPEasyEnv") ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 # to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs = True # vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 1 # How many epochs to train the network using supervised methods agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter( 'observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) ############### # Environment # ############### env_params = GymVectorEnvironment() env_params.level = './environment.py:DistillerWrapperEnvironment' vis_params = VisualizationParameters() vis_params.dump_parameters_documentation = False vis_params.render = True vis_params.native_rendering = True vis_params.dump_signals_to_csv_every_x_episodes = 1 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.00025 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ################ # Environment # ################ env_params = GymVectorEnvironment() env_params.level = "env.gym_super_mario_bros.smb_env:SuperMarioBrosEnv" ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = True ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150
agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 10000000 agent_params.algorithm.beta_entropy = 0.0001 agent_params.network_wrappers['main'].learning_rate = 0.00001 agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 20.)) agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=mujoco_v2['inverted_pendulum']) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 preset_validation_params.num_workers = 8 preset_validation_params.reward_test_level = 'inverted_pendulum' preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper'] graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params,
agent_params.algorithm.discount = 0.999 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() ############### # Environment # ############### turtlebot3_input_filter = InputFilter(is_a_reference_filter=True) turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) turtlebot3_input_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = turtlebot3_input_filter env_params.level = 'SageMaker-TurtleBot3-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
agent_params.algorithm.discount = 0.99 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) ############### # Environment # ############### env_params = GymVectorEnvironment(level='CartPole-v0') env_params.custom_reward_threshold = 200 # Set the target success env_params.target_success_rate = 1.0 ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 400 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(),
agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) agent_params.pre_network_filter.add_observation_filter( "observation", "normalize_observation", ObservationNormalizationFilter(name="normalize_observation"), ) ############### # Environment # ############### config = { "eplus_path": "/usr/local/EnergyPlus-8-8-0/", "weather_file": "weather/USA_CA_San.Francisco.Intl.AP.724940_TMY3.epw", } env_params = GymVectorEnvironment( level="eplus.envs.data_center_env:DataCenterEnv") env_params.additional_simulator_parameters = {"config": config} ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150
# Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(50) schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = PolicyGradientsAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 5 agent_params.algorithm.num_steps_between_gradient_updates = 20000 agent_params.network_wrappers['main'].learning_rate = 0.0005 agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.)) agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level="InvertedPendulum-v2") graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters())
'observation'].scheme = [Dense(64)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense(64) ] agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) # Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC ############### # Environment # ############### env_params = GymVectorEnvironment() env_params.level = '../automated_deep_compression/ADC.py:DistillerWrapperEnvironment' vis_params = VisualizationParameters() vis_params.dump_parameters_documentation = False vis_params.render = True vis_params.native_rendering = True vis_params.dump_signals_to_csv_every_x_episodes = 1 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params) ######## # Test # ########
from rl_coach.agents.soft_actor_critic_agent import SoftActorCriticAgentParameters from rl_coach.core_types import EnvironmentSteps, EnvironmentEpisodes from rl_coach.environments.gym_environment import GymEnvironmentParameters, GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters import coinche.gym ######### # Agent # ######### agent_params = SoftActorCriticAgentParameters() ############### # Environment # ############### env_params = GymVectorEnvironment(level='coinche-v0') #################### # Graph Scheduling # #################### num_round_improve_steps = 80 num_round_heatup = 8 num_round_training = 300 num_round_evaluation = 10 schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(num_round_improve_steps) schedule_params.heatup_steps = EnvironmentEpisodes(num_round_heatup) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(num_round_training) schedule_params.evaluation_steps = EnvironmentEpisodes(num_round_evaluation)
'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05 agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2) agent_params.exploration.evaluation_epsilon = 0 ############### # Environment # ############### env_params = GymVectorEnvironment( level='rl_coach.environments.toy_problems.bit_flip:BitFlip') env_params.additional_simulator_parameters = { 'bit_length': bit_length, 'mean_zero': True } env_params.custom_reward_threshold = -bit_length + 1 ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = -7.9 preset_validation_params.max_episodes_to_achieve_reward = 10000 graph_manager = BasicRLGraphManager(
agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( 20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5) ############### # Environment # ############### MeiroRunnerFilter = InputFilter(is_a_reference_filter=True) env_params = GymVectorEnvironment() env_params.level = 'RoboMaker-MeiroRunner-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager( agent_params=agent_params,
# Distributed Coach synchronization type. agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) ############### # Environment # ############### config = { 'eplus_path': '/usr/local/EnergyPlus-8-8-0/', 'weather_file': 'weather/USA_CA_San.Francisco.Intl.AP.724940_TMY3.epw' } env_params = GymVectorEnvironment(level='eplus.envs.data_center_env:DataCenterEnv') env_params.additional_simulator_parameters = {'config': config } ################# # Visualization # ################# vis_params = VisualizationParameters() vis_params.dump_gifs = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150
# Agent # ######### agent_params = NStepQAgentParameters() agent_params.algorithm.discount = 0.99 agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 100) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 200.)) ############### # Environment # ############### env_params = GymVectorEnvironment(level='CartPole-v0') ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 200 preset_validation_params.num_workers = 8 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(),
# ER size experience_generating_agent_params.memory = EpisodicExperienceReplayParameters() experience_generating_agent_params.memory.max_size = \ (MemoryGranularity.Transitions, experience_generating_schedule_params.heatup_steps.num_steps + experience_generating_schedule_params.improve_steps.num_steps + 1) # E-Greedy schedule experience_generating_agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, DATASET_SIZE) experience_generating_agent_params.exploration.evaluation_epsilon = 0 ################ # Environment # ################ env_params = GymVectorEnvironment(level='MountainCar-v0') ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 150 preset_validation_params.max_episodes_to_achieve_reward = 50 preset_validation_params.read_csv_tries = 500 graph_manager = BatchRLGraphManager(agent_params=agent_params, experience_generating_agent_params=experience_generating_agent_params, experience_generating_schedule_params=experience_generating_schedule_params, env_params=env_params, schedule_params=schedule_params,
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager(
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters from rl_coach.environments.gym_environment import GymVectorEnvironment from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import SimpleSchedule from rl_coach.core_types import * #from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod from rl_coach import logger from rl_coach.base_parameters import TaskParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.architectures.embedder_parameters import InputEmbedderParameters ################ # Environment # ################ env_params = GymVectorEnvironment(level='simple_corridor_env:SimpleCorridor') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# #env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs = False