示例#1
0
def main():
    args = parse_cmd_line()
    env_params = GymVectorEnvironment(level=args.env)
    if args.mode == 'rnd':
        print('training on pure random agent experience and training offline')
        train_on_pure_random(env_params, args.n_epochs, args.dataset_size)
    elif args.mode == 'olx':
        print(
            'using trained egent to generate experience and training offline')
        train_using_experience_agent(env_params, args.n_epochs,
                                     args.dataset_size)
    else:
        # train_on_csv_file('./acrobot_dataset.csv')
        print('loading experience from csv and training offline')
        # assert os.path.expanduser(args.mode), 'csv file does not exist'
        # extract obs_dim, act_dim from the file
        csv_file_name = os.path.join(os.path.expanduser('~'),
                                     'share/Data/MLA/L2P', args.mode)
        df = pd.read_csv(csv_file_name)
        act_dim = 1 + df['action'].max(
        )  # assuming discrete actions starting from 0
        obs_dim = len([s for s in df.columns if 'state_feature' in s])
        train_on_csv_file(csv_file_name,
                          args.n_epochs,
                          args.dataset_size,
                          obs_dim=obs_dim,
                          act_dim=act_dim)
    return
def evaluate(params):
    # file params
    experiment_path = os.path.join(params.output_data_dir)
    logger.experiment_path = os.path.join(experiment_path, 'evaluation')
    params.checkpoint_restore_dir = os.path.join(params.input_data_dir,
                                                 'checkpoint')
    checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint')

    inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".")
    # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace
    # the absolute path for the evaluation-from-a-checkpointed-model to work

    vis_params = VisualizationParameters()
    vis_params.dump_gifs = True

    task_params = TaskParameters(evaluate_only=True,
                                 experiment_path=logger.experiment_path)
    task_params.__dict__ = add_items_to_dict(task_params.__dict__,
                                             params.__dict__)

    graph_manager = BasicRLGraphManager(
        agent_params=ClippedPPOAgentParameters(),
        env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'),
        schedule_params=ScheduleParameters(),
        vis_params=vis_params)
    graph_manager = graph_manager.create_graph(task_parameters=task_params)
    graph_manager.evaluate(EnvironmentSteps(5))
示例#3
0
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    2048)

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
agent_params.pre_network_filter.add_observation_filter(
    'observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='knapsack_env:KnapSackMediumEnv')

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 400
示例#4
0
    "observation"
].activation_function = "tanh"
agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh"
agent_params.network_wrappers["main"].batch_size = 64
agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5
agent_params.network_wrappers["main"].clip_gradients = 40.0

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

###############
# Environment #
###############
env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim")

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 400

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params,
    vis_params=VisualizationParameters(),
    preset_validation_params=preset_validation_params,
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters[
        'observation'].activation_function = 'relu'
    agent_params.network_wrappers[
        'main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
        1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
        params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(
            1.0, params["e_greedy_value"], params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation',
                                                  'to_grayscale',
                                                  ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'stacking',
        ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(
        agent_params=agent_params,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
示例#6
0
#exploration.epsilon_schedule = LinearSchedule(0.5, 0.01, 100000)
agent_params.exploration = exploration
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale',
                                            ObservationRGBToYFilter())
DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))
DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                            ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = DeepRacerInputFilter
env_params.level = 'RoboMaker-DeepRacer-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False
vis_params.tensorboard = True
vis_params.print_networks_summary = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.core_types import *
#from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
from rl_coach import logger
from rl_coach.base_parameters import TaskParameters
from rl_coach.base_parameters import VisualizationParameters

################
#  Environment #
################

env_params = GymVectorEnvironment(level='VRP_abstract_env:VRPEasyEnv')

#########
# Agent #
#########

agent_params = ClippedPPOAgentParameters()

#################
# Visualization #
#################

env_params.frame_skip = 5  #to make sure the gifs work without skipping steps

vis_params = VisualizationParameters()
vis_params.dump_gifs = True
#vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
示例#8
0
    30)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(30)
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale',
                                            ObservationRGBToYFilter())
DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))
DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                            ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = DeepRacerInputFilter
env_params.level = 'SageMaker-DeepRacer-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 4000
preset_validation_params.max_episodes_to_achieve_reward = 20

graph_manager = BasicRLGraphManager(
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Conv2d(32, [1, 3], 1)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty
agent_params.network_wrappers['main'].batch_size = 64
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 150000)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048)

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

###############
# Environment #
###############

env_params = GymVectorEnvironment(level='portfolio_env:PortfolioEnv')
env_params.__dict__['observation_space_type'] = ObservationSpaceType.Tensor

########
# Test #
########

preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
示例#11
0
agent_params = BootstrappedDQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.00025
agent_params.memory.max_size = (MemoryGranularity.Transitions, 1000000)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4)
agent_params.network_wrappers['main'].heads_parameters[
    0].num_output_head_copies = num_output_head_copies
agent_params.network_wrappers['main'].heads_parameters[
    0].rescale_gradient_from_head_by_factor = 1.0 / num_output_head_copies
agent_params.exploration.bootstrapped_data_sharing_probability = 1.0
agent_params.exploration.architecture_num_q_heads = num_output_head_copies
agent_params.exploration.epsilon_schedule = ConstantSchedule(0)
agent_params.input_filter = NoInputFilter()
agent_params.output_filter = NoOutputFilter()

###############
# Environment #
###############
env_params = GymVectorEnvironment(
    level=
    'rl_coach.environments.toy_problems.exploration_chain:ExplorationChain')
env_params.additional_simulator_parameters = {
    'chain_length': N,
    'max_steps': N + 7
}

graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=VisualizationParameters())
示例#12
0
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.architectures.layers import Dense
from rl_coach.schedules import LinearSchedule
from rl_coach.filters.filter import InputFilter
from rl_coach.filters.observation.observation_normalization_filter import ObservationNormalizationFilter
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters, DistributedCoachSynchronizationType
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import ScheduleParameters

###############
# Environment #
###############
import gym_guess_number  # pylint: disable=unused-import
env_params = GymVectorEnvironment(level='GuessNumber-v0')

####################
# Graph Scheduling #
####################
training_steps = 200000
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(training_steps)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

######################
# Agent - ClippedPPO #
######################
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.core_types import *
#from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
from rl_coach import logger
from rl_coach.base_parameters import TaskParameters
from rl_coach.base_parameters import VisualizationParameters

################
#  Environment #
################

env_params = GymVectorEnvironment(level='TSP_env:TSPMediumEnv')

#########
# Agent #
#########

agent_params = ClippedPPOAgentParameters()

#################
# Visualization #
#################

env_params.frame_skip = 5  #to make sure the gifs work without skipping steps

vis_params = VisualizationParameters()
vis_params.dump_gifs = True
#vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.core_types import *
#from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
from rl_coach import logger
from rl_coach.base_parameters import TaskParameters
from rl_coach.base_parameters import VisualizationParameters

################
#  Environment #
################

env_params = GymVectorEnvironment(level='TSP_env:TSPMediumEnv')

#########
# Agent #
#########

agent_params = ClippedPPOAgentParameters()

#################
# Visualization #
#################

env_params.frame_skip = 5 #to make sure the gifs work without skipping steps

vis_params = VisualizationParameters()
vis_params.dump_gifs=True
#vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
示例#15
0
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense(200)])
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty,
                                                                                       number_of_lstm_cells=128)

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = False
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000
preset_validation_params.num_workers = 8
preset_validation_params.reward_test_level = 'inverted_pendulum'
preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper']

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)
示例#16
0
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
                                                       ObservationNormalizationFilter(name='normalize_desired_goal'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(fetch_v1))
env_params.custom_reward_threshold = -49

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.trace_test_levels = ['slide', 'pick_and_place', 'push', 'reach']


graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)

agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale',
                                               ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter(
    'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking',
                                               ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'RoboMaker-ObjectTracker-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
示例#18
0
# NN configuration
agent_params.network_wrappers['main'].batch_size = 32
agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(512)]
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(512)]

# ER size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10000)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 40000)

#############
# Environment
#############

env_params = GymVectorEnvironment(level='trading_env:TradingEnv')

##################
# Manage resources
##################

preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params,
                                    vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)
# from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
from rl_coach import logger
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.base_parameters import TaskParameters, VisualizationParameters
from rl_coach.core_types import *
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule

################
#  Environment #
################

env_params = GymVectorEnvironment(level="TSP_env:TSPEasyEnv")

#########
# Agent #
#########

agent_params = ClippedPPOAgentParameters()

#################
# Visualization #
#################

env_params.frame_skip = 5  # to make sure the gifs work without skipping steps

vis_params = VisualizationParameters()
vis_params.dump_gifs = True
# vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
示例#20
0
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 1
# How many epochs to train the network using supervised methods
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter(
    'observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment()
env_params.level = './environment.py:DistillerWrapperEnvironment'

vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
vis_params.render = True
vis_params.native_rendering = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=vis_params)
示例#21
0
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)

# NN configuration
agent_params.network_wrappers['main'].learning_rate = 0.00025
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False

# ER size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

################
#  Environment #
################
env_params = GymVectorEnvironment()
env_params.level = "env.gym_super_mario_bros.smb_env:SuperMarioBrosEnv"

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
示例#22
0
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 10000000
agent_params.algorithm.beta_entropy = 0.0001
agent_params.network_wrappers['main'].learning_rate = 0.00001

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale',
                                            RewardRescaleFilter(1 / 20.))
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=mujoco_v2['inverted_pendulum'])

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000
preset_validation_params.num_workers = 8
preset_validation_params.reward_test_level = 'inverted_pendulum'
preset_validation_params.trace_test_levels = ['inverted_pendulum', 'hopper']

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
agent_params.algorithm.discount = 0.999
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'SageMaker-TurtleBot3-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
agent_params.algorithm.discount = 0.99
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048)

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                        ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='CartPole-v0')
env_params.custom_reward_threshold = 200
# Set the target success
env_params.target_success_rate = 1.0


########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 400

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
agent_params.pre_network_filter.add_observation_filter(
    "observation",
    "normalize_observation",
    ObservationNormalizationFilter(name="normalize_observation"),
)

###############
# Environment #
###############
config = {
    "eplus_path": "/usr/local/EnergyPlus-8-8-0/",
    "weather_file": "weather/USA_CA_San.Francisco.Intl.AP.724940_TMY3.epw",
}
env_params = GymVectorEnvironment(
    level="eplus.envs.data_center_env:DataCenterEnv")
env_params.additional_simulator_parameters = {"config": config}

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(50)
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = PolicyGradientsAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 5
agent_params.algorithm.num_steps_between_gradient_updates = 20000
agent_params.network_wrappers['main'].learning_rate = 0.0005

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())


###############
# Environment #
###############
env_params = GymVectorEnvironment(level="InvertedPendulum-v2")

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters())


示例#27
0
    'observation'].scheme = [Dense(64)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense(64)
]

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
env_params = GymVectorEnvironment()
env_params.level = '../automated_deep_compression/ADC.py:DistillerWrapperEnvironment'

vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
vis_params.render = True
vis_params.native_rendering = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=vis_params)

########
# Test #
########
示例#28
0
from rl_coach.agents.soft_actor_critic_agent import SoftActorCriticAgentParameters
from rl_coach.core_types import EnvironmentSteps, EnvironmentEpisodes
from rl_coach.environments.gym_environment import GymEnvironmentParameters, GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import ScheduleParameters
import coinche.gym

#########
# Agent #
#########
agent_params = SoftActorCriticAgentParameters()

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='coinche-v0')

####################
# Graph Scheduling #
####################
num_round_improve_steps = 80
num_round_heatup = 8
num_round_training = 300
num_round_evaluation = 10

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(num_round_improve_steps)
schedule_params.heatup_steps = EnvironmentEpisodes(num_round_heatup)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(num_round_training)
schedule_params.evaluation_steps = EnvironmentEpisodes(num_round_evaluation)
示例#29
0
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
    40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0

###############
# Environment #
###############
env_params = GymVectorEnvironment(
    level='rl_coach.environments.toy_problems.bit_flip:BitFlip')
env_params.additional_simulator_parameters = {
    'bit_length': bit_length,
    'mean_zero': True
}
env_params.custom_reward_threshold = -bit_length + 1

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = -7.9
preset_validation_params.max_episodes_to_achieve_reward = 10000

graph_manager = BasicRLGraphManager(
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.999
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
MeiroRunnerFilter = InputFilter(is_a_reference_filter=True)

env_params = GymVectorEnvironment()
env_params.level = 'RoboMaker-MeiroRunner-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
config = {
    'eplus_path': '/usr/local/EnergyPlus-8-8-0/',
    'weather_file': 'weather/USA_CA_San.Francisco.Intl.AP.724940_TMY3.epw'
}
env_params = GymVectorEnvironment(level='eplus.envs.data_center_env:DataCenterEnv')
env_params.additional_simulator_parameters = {'config': config }

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
# Agent #
#########
agent_params = NStepQAgentParameters()

agent_params.algorithm.discount = 0.99
agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    100)
agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale',
                                            RewardRescaleFilter(1 / 200.))

###############
# Environment #
###############
env_params = GymVectorEnvironment(level='CartPole-v0')

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 200
preset_validation_params.num_workers = 8

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
    schedule_params=schedule_params,
    vis_params=VisualizationParameters(),
# ER size
experience_generating_agent_params.memory = EpisodicExperienceReplayParameters()
experience_generating_agent_params.memory.max_size = \
    (MemoryGranularity.Transitions,
     experience_generating_schedule_params.heatup_steps.num_steps +
     experience_generating_schedule_params.improve_steps.num_steps + 1)

# E-Greedy schedule
experience_generating_agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, DATASET_SIZE)
experience_generating_agent_params.exploration.evaluation_epsilon = 0


################
#  Environment #
################
env_params = GymVectorEnvironment(level='MountainCar-v0')

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 50
preset_validation_params.read_csv_tries = 500

graph_manager = BatchRLGraphManager(agent_params=agent_params,
                                    experience_generating_agent_params=experience_generating_agent_params,
                                    experience_generating_schedule_params=experience_generating_schedule_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale',
                                              ObservationRGBToYFilter())
SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                              ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import SimpleSchedule
from rl_coach.core_types import *
#from rl_coach.environments.environment import SelectedPhaseOnlyDumpMethod, MaxDumpMethod
from rl_coach import logger
from rl_coach.base_parameters import TaskParameters
from rl_coach.base_parameters import VisualizationParameters
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters

################
#  Environment #
################

env_params = GymVectorEnvironment(level='simple_corridor_env:SimpleCorridor')

#########
# Agent #
#########

agent_params = ClippedPPOAgentParameters()

#################
# Visualization #
#################

#env_params.frame_skip = 5 #to make sure the gifs work without skipping steps

vis_params = VisualizationParameters()
vis_params.dump_gifs = False