Exemplo n.º 1
0
cycles = 100  # 20 for reach. for others it's 100

####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(cycles * 200)  # 200 epochs
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(cycles)  # 50 cycles
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(0)

################
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
actor_network.heads_parameters[0].batchnorm = False
Exemplo n.º 2
0
from rl_coach.graph_managers.graph_manager import ScheduleParameters

####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'].scheme = [Dense([300])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([200])]
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'].scheme = [Dense([400])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1/10.))

###############
# Environment #
###############
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import ScheduleParameters

####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4)

# front camera
agent_params.network_wrappers['actor'].input_embedders_parameters['forward_camera'] = \
    agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['critic'].input_embedders_parameters['forward_camera'] = \
    agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation')

# left camera
agent_params.network_wrappers['actor'].input_embedders_parameters['left_camera'] = \
    copy.deepcopy(agent_params.network_wrappers['actor'].input_embedders_parameters['forward_camera'])
agent_params.network_wrappers['critic'].input_embedders_parameters['left_camera'] = \
    copy.deepcopy(agent_params.network_wrappers['critic'].input_embedders_parameters['forward_camera'])

# right camera
from rl_coach.graph_managers.graph_manager import ScheduleParameters

####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentSteps(2000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(10000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(400)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(400)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
Exemplo n.º 5
0
from rl_coach.base_parameters import EmbedderScheme
from rl_coach.architectures.tensorflow_components.architecture import Dense

####################
# Block Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(400)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(1000)
schedule_params.evaluation_steps = EnvironmentEpisodes(0)
schedule_params.heatup_steps = EnvironmentSteps(2)

#####################
# DDPG Agent Params #
#####################
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense([300])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = [Dense([300])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[
    0].activation_function = 'sigmoid'
#agent_params.network_wrappers['critic'].clip_gradients = 100
Exemplo n.º 6
0
steps_per_episode = 13

####################
# Block Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(400)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(3) #3)  # Neta: (1000)
schedule_params.evaluation_steps = EnvironmentEpisodes(1) #1)  # Neta: 0
schedule_params.heatup_steps = EnvironmentEpisodes(100) #120*steps_per_episode) # Neta (2)

#####################
# DDPG Agent Params #
#####################
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(300)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(300)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[0].activation_function = 'sigmoid'
# agent_params.network_wrappers['critic'].clip_gradients = 100
# agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1)
agent_params.algorithm.heatup_using_network_decisions = True
agent_params.algorithm.discount = 1
# Replay buffer size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
Exemplo n.º 7
0
from rl_coach.filters.filter import InputFilter
# !!!! Enable when using branch "distiller-AMC-induced-changes"
from rl_coach.filters.reward import RewardEwmaNormalizationFilter
import numpy as np

####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(0)
schedule_params.evaluation_steps = EnvironmentEpisodes(0)

#####################
# DDPG Agent Params #
#####################
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense(300)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense(300)
]
agent_params.network_wrappers['actor'].heads_parameters[
    0].activation_function = 'sigmoid'
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = [Dense(300)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense(300)
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = [Dense(300)]