def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'), 'measurements': InputEmbedderParameters(activation_function='leaky_relu'), 'goal': InputEmbedderParameters(activation_function='leaky_relu')} self.input_embedders_parameters['observation'].scheme = [ Conv2d(32, 8, 4), Conv2d(64, 4, 2), Conv2d(64, 3, 1), Dense(512), ] self.input_embedders_parameters['measurements'].scheme = [ Dense(128), Dense(128), Dense(128), ] self.input_embedders_parameters['goal'].scheme = [ Dense(128), Dense(128), Dense(128), ] self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu', scheme=MiddlewareScheme.Empty) self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')] self.async_training = False self.batch_size = 64 self.adam_optimizer_beta1 = 0.95
def _build_module(self, input_layer): weight_init = Orthogonal(gain=np.sqrt(2)) input_layer = Conv2d(num_filters=32, kernel_size=8, strides=4)(input_layer, kernel_initializer=weight_init) input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1] input_layer = Conv2d(num_filters=64, kernel_size=4, strides=2)(input_layer, kernel_initializer=weight_init) input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1] input_layer = Conv2d(num_filters=64, kernel_size=3, strides=1)(input_layer, kernel_initializer=weight_init) input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1] input_layer = tf.contrib.layers.flatten(input_layer) if self.is_predictor: input_layer = self.dense_layer(512)(input_layer, kernel_initializer=weight_init) input_layer = BatchnormActivationDropout(activation_function=tf.nn.relu)(input_layer)[-1] input_layer = self.dense_layer(512)(input_layer, kernel_initializer=weight_init) input_layer = BatchnormActivationDropout(activation_function=tf.nn.relu)(input_layer)[-1] self.output = self.dense_layer(512)(input_layer, name='output', kernel_initializer=weight_init)
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.0003 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [ Conv2d(32, 8, 4), Conv2d(64, 4, 2), Conv2dWithAttention(64, 3, 1, 256) ] agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'relu' agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'relu' #agent_params.network_wrappers['main'].middleware_parameters.scheme = [ # Conv2dWithAttention(64, 3, 1, 1000) #] agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
# Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = NStepQAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [Conv2d(16, 8, 4), Conv2d(32, 4, 2)] agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(256) ] ############### # Environment # ############### env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.trace_test_levels = [
schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = TrainingSteps(500) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ################ # Agent Params # ################ agent_params = CILAgentParameters() # forward camera and measurements input agent_params.network_wrappers['main'].input_embedders_parameters = { 'CameraRGB': InputEmbedderParameters( scheme=[ Conv2d(32, 5, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(32, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(64, 3, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(64, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(128, 3, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(128, 3, 1),
def schemes(self): return { EmbedderScheme.Empty: [], EmbedderScheme.Shallow: [ Conv2d(32, 3, 1) ], # atari dqn EmbedderScheme.Medium: [ Conv2d(32, 8, 4), Conv2d(64, 4, 2), Conv2d(64, 3, 1) ], # carla EmbedderScheme.Deep: \ [ Conv2d(32, 5, 2), Conv2d(32, 3, 1), Conv2d(64, 3, 2), Conv2d(64, 3, 1), Conv2d(128, 3, 2), Conv2d(128, 3, 1), Conv2d(256, 3, 2), Conv2d(256, 3, 1) ] }
agent_params = ClippedPPOAgentParameters() # agent_params.network_wrappers['main'].input_embedders_parameters = { # 'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3), # 'stereo': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3) # } # agent_params.network_wrappers['main'].input_embedders_parameters = { # 'left_camera': InputEmbedderParameters(activation_function='relu'), # 'stereo': InputEmbedderParameters(activation_function='relu') # } agent_params.network_wrappers['main'].input_embedders_parameters = { 'left_camera': InputEmbedderParameters(activation_function='relu'), 'stereo': InputEmbedderParameters(scheme=[ Conv2d(32, 3, 1), Conv2d(64, 3, 2), Conv2d(64, 3, 1), Conv2d(128, 3, 2), Conv2d(128, 3, 1), Dense(256) ], activation_function='relu') } agent_params.network_wrappers['main'].learning_rate = 0.0003 agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999