def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) custom_configs = model_config.get("custom_model_config") self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10) activation = model_config.get("fcnet_activation", "tanh") encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128) self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) self._all_fc1 = SlimFC(in_size=3, out_size=64, initializer=normc_initializer(1.0), activation_fn=activation) self._all_fc2 = SlimFC(in_size=64, out_size=16, initializer=normc_initializer(1.0), activation_fn=activation) self._action_layer = SlimFC(in_size=16, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_layer = SlimFC(in_size=16, out_size=1, initializer=normc_initializer(0.01), activation_fn=None) self._features = None
def _validate_config(config: ModelConfigDict, framework: str) -> None: """Validates a given model config dict. Args: config (ModelConfigDict): The "model" sub-config dict within the Trainer's config dict. framework (str): One of "jax", "tf2", "tf", "tfe", or "torch". Raises: ValueError: If something is wrong with the given config. """ if config.get("use_attention") and config.get("use_lstm"): raise ValueError("Only one of `use_lstm` or `use_attention` may " "be set to True!") if framework == "jax": if config.get("use_attention"): raise ValueError("`use_attention` not available for " "framework=jax so far!") elif config.get("use_lstm"): raise ValueError("`use_lstm` not available for " "framework=jax so far!") if config.get("framestack") != DEPRECATED_VALUE: # deprecation_warning( # old="framestack", new="num_framestacks (int)", error=False) # If old behavior is desired, disable traj. view-style # framestacking. config["num_framestacks"] = 0
def _get_v2_model_class(input_space: gym.Space, model_config: ModelConfigDict, framework: str = "tf") -> Type[ModelV2]: VisionNet = None ComplexNet = None Keras_FCNet = None Keras_VisionNet = None if framework in ["tf2", "tf", "tfe"]: from ray.rllib.models.tf.fcnet import \ FullyConnectedNetwork as FCNet, \ Keras_FullyConnectedNetwork as Keras_FCNet from ray.rllib.models.tf.visionnet import \ VisionNetwork as VisionNet, \ Keras_VisionNetwork as Keras_VisionNet from ray.rllib.models.tf.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "torch": from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as FCNet) from ray.rllib.models.torch.visionnet import (VisionNetwork as VisionNet) from ray.rllib.models.torch.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "jax": from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as FCNet) else: raise ValueError( "framework={} not supported in `ModelCatalog._get_v2_model_" "class`!".format(framework)) orig_space = input_space if not hasattr( input_space, "original_space") else input_space.original_space # `input_space` is 3D Box -> VisionNet. if isinstance(input_space, Box) and len(input_space.shape) == 3: if framework == "jax": raise NotImplementedError("No non-FC default net for JAX yet!") elif model_config.get("_use_default_native_models") and \ Keras_VisionNet: return Keras_VisionNet return VisionNet # `input_space` is 1D Box -> FCNet. elif isinstance(input_space, Box) and len(input_space.shape) == 1 and \ (not isinstance(orig_space, (Dict, Tuple)) or not any( isinstance(s, Box) and len(s.shape) >= 2 for s in tree.flatten(orig_space.spaces))): # Keras native requested AND no auto-rnn-wrapping. if model_config.get("_use_default_native_models") and Keras_FCNet: return Keras_FCNet # Classic ModelV2 FCNet. else: return FCNet # Complex (Dict, Tuple, 2D Box (flatten), Discrete, MultiDiscrete). else: if framework == "jax": raise NotImplementedError("No non-FC default net for JAX yet!") return ComplexNet
def _validate_config(config: ModelConfigDict, framework: str) -> None: """Validates a given model config dict. Args: config (ModelConfigDict): The "model" sub-config dict within the Trainer's config dict. framework (str): One of "jax", "tf2", "tf", "tfe", or "torch". Raises: ValueError: If something is wrong with the given config. """ # Soft-deprecate custom preprocessors. if config.get("custom_preprocessor") is not None: deprecation_warning( old="model.custom_preprocessor", new="gym.ObservationWrapper around your env or handle complex " "inputs inside your Model", error=False, ) if config.get("use_attention") and config.get("use_lstm"): raise ValueError("Only one of `use_lstm` or `use_attention` may " "be set to True!") if framework == "jax": if config.get("use_attention"): raise ValueError("`use_attention` not available for " "framework=jax so far!") elif config.get("use_lstm"): raise ValueError("`use_lstm` not available for " "framework=jax so far!")
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Nonlinearity for fully connected net (tanh, relu). Default: "tanh" activation = model_config.get("fcnet_activation") # Number of hidden layers for fully connected net. Default: [256, 256] hiddens = [256, 256] # model_config.get("fcnet_hiddens", []) # Whether to skip the final linear layer used to resize the hidden layer # outputs to size `num_outputs`. If True, then the last hidden layer # should already match num_outputs. # no_final_linear = False self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = False self._embedd = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) # Player Hot Encoded = 3 * Number of Cards Played per trick = 4 # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4 self._hidden_layers = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._value_branch_separate = None self._value_embedding = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. self._value_embedding = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) self._value_branch_separate = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._logits = SlimFC(in_size=hiddens[-1], out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=hiddens[-1], out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._cards_in = None self._players_in = None
def sensorModel(num_actions): modelConfig = ModelConfigDict() modelConfig["fcnet_hiddens"] = [[5 * 3 + 4, 32], [32, 64], [64, 128], [128, 64], [64, 32], [32, num_actions]] modelConfig["fcnet_activation"] = "relu" return modelConfig
def vgg16(num_actions, inChannel): modelConfig = ModelConfigDict() modelConfig["inChannel"] = inChannel """modelConfig["conv_filters"] = [[64, 3, 1, 1, [0]], #[out_channels, kernel_size, stride, padding, [pooling (max), kernel, stride]] [64, 3, 1, 1, [1, 2, 2]], [128, 3, 1, 1, [0]], [128, 3, 1, 1, [1, 2, 2]], [256, 3, 1, 1, [0]], [256, 3, 1, 1, [0]], [256, 3, 1, 1, [1, 2, 2]], [512, 3, 1, 1, [0]], [512, 3, 1, 1, [0]], [512, 3, 1, 1, [1, 2, 2]], [512, 3, 1, 1, [0]], [512, 3, 1, 1, [0]], [512, 3, 1, 1, [1, 2, 2]]] """ modelConfig["conv_filters"] = [ [ 64, 3, 1, 1, [0] ], # [out_channels, kernel_size, stride, padding, [pooling (max), kernel, stride]] [64, 3, 1, 1, [1, 2, 2]], [128, 3, 1, 1, [0]], [128, 3, 1, 1, [1, 2, 2]], [256, 3, 1, 1, [0]], [256, 3, 1, 1, [1, 2, 2]] ] modelConfig["conv_activation"] = "relu" modelConfig["fcnet_hiddens"] = [[256 * 32 * 32 + 5 * 3 + 4, 64], [64, num_actions]] modelConfig["fcnet_activation"] = "relu" return modelConfig
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): raise ValueError("Config for conv_filters is required") TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None # Holds the current "base" output (before logits layer). self._features = None self.num_outputs = num_outputs if num_outputs else action_space.shape[0] self.filters = filters self.activation = activation self.obs_space = obs_space self._create_model()
def _validate_config( config: ModelConfigDict, action_space: gym.spaces.Space, framework: str ) -> None: """Validates a given model config dict. Args: config: The "model" sub-config dict within the Trainer's config dict. action_space: The action space of the model, whose config are validated. framework: One of "jax", "tf2", "tf", "tfe", or "torch". Raises: ValueError: If something is wrong with the given config. """ # Soft-deprecate custom preprocessors. if config.get("custom_preprocessor") is not None: deprecation_warning( old="model.custom_preprocessor", new="gym.ObservationWrapper around your env or handle complex " "inputs inside your Model", error=False, ) if config.get("use_attention") and config.get("use_lstm"): raise ValueError( "Only one of `use_lstm` or `use_attention` may be set to True!" ) # For complex action spaces, only allow prev action inputs to # LSTMs and attention nets iff `_disable_action_flattening=True`. # TODO: `_disable_action_flattening=True` will be the default in # the future. if ( ( config.get("lstm_use_prev_action") or config.get("attention_use_n_prev_actions", 0) > 0 ) and not config.get("_disable_action_flattening") and isinstance(action_space, (Tuple, Dict)) ): raise ValueError( "For your complex action space (Tuple|Dict) and your model's " "`prev-actions` setup of your model, you must set " "`_disable_action_flattening=True` in your main config dict!" ) if framework == "jax": if config.get("use_attention"): raise ValueError( "`use_attention` not available for framework=jax so far!" ) elif config.get("use_lstm"): raise ValueError("`use_lstm` not available for framework=jax so far!")
def _get_v2_model_class(input_space: gym.Space, model_config: ModelConfigDict, framework: str = "tf") -> Type[ModelV2]: VisionNet = None ComplexNet = None if framework in ["tf2", "tf", "tfe"]: from ray.rllib.models.tf.fcnet import \ FullyConnectedNetwork as FCNet from ray.rllib.models.tf.visionnet import \ VisionNetwork as VisionNet from ray.rllib.models.tf.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "torch": from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as FCNet) from ray.rllib.models.torch.visionnet import (VisionNetwork as VisionNet) from ray.rllib.models.torch.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "jax": from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as FCNet) else: raise ValueError( "framework={} not supported in `ModelCatalog._get_v2_model_" "class`!".format(framework)) # Discrete/1D obs-spaces or 2D obs space but traj. view framestacking # disabled. num_framestacks = model_config.get("num_framestacks", "auto") # Tuple space, where at least one sub-space is image. # -> Complex input model. space_to_check = input_space if not hasattr( input_space, "original_space") else input_space.original_space if isinstance(input_space, Tuple) or (isinstance(space_to_check, Tuple) and any( isinstance(s, Box) and len(s.shape) >= 2 for s in space_to_check.spaces)): return ComplexNet # Single, flattenable/one-hot-abe space -> Simple FCNet. if isinstance(input_space, (Discrete, MultiDiscrete)) or \ len(input_space.shape) == 1 or ( len(input_space.shape) == 2 and ( num_framestacks == "auto" or num_framestacks <= 1)): return FCNet elif framework == "jax": raise NotImplementedError("No non-FC default net for JAX yet!") # Last resort: Conv2D stack for single image spaces. return VisionNet
def jointModel(num_actions, inChannel): modelConfig = ModelConfigDict() modelConfig["inChannel"] = inChannel modelConfig["conv_filters"] = [[32, 4, 4, 0, [0]], [64, 4, 2, 0, [0]], [128, 2, 2, 0, [0]]] modelConfig["conv_activation"] = "relu" modelConfig["fcnet_hiddens"] = [[128 * 15 * 15 + 5 * 3 + 4, 64], [64, num_actions]] modelConfig["fcnet_activation"] = "relu" return modelConfig
def _validate_config(config: ModelConfigDict, framework: str) -> None: """Validates a given model config dict. Args: config (ModelConfigDict): The "model" sub-config dict within the Trainer's config dict. framework (str): One of "jax", "tf2", "tf", "tfe", or "torch". Raises: ValueError: If something is wrong with the given config. """ if config.get("use_attention") and config.get("use_lstm"): raise ValueError("Only one of `use_lstm` or `use_attention` may " "be set to True!") if framework == "jax": if config.get("use_attention"): raise ValueError("`use_attention` not available for " "framework=jax so far!") elif config.get("use_lstm"): raise ValueError("`use_lstm` not available for " "framework=jax so far!")
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) self.cell_size = model_config["lstm_cell_size"] self.time_major = model_config.get("_time_major", False) self.use_prev_action = model_config["lstm_use_prev_action"] self.use_prev_reward = model_config["lstm_use_prev_reward"] if isinstance(action_space, Discrete): self.action_dim = action_space.n elif isinstance(action_space, MultiDiscrete): self.action_dim = np.product(action_space.nvec) elif action_space.shape is not None: self.action_dim = int(np.product(action_space.shape)) else: self.action_dim = int(len(action_space)) # Add prev-action/reward nodes to input to LSTM. if self.use_prev_action: self.num_outputs += self.action_dim if self.use_prev_reward: self.num_outputs += 1 self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=not self.time_major) self.num_outputs = num_outputs # Postprocess LSTM output with another hidden layer and compute values. self._logits_branch = SlimFC(in_size=self.cell_size, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC(in_size=self.cell_size, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) # Add prev-a/r to this model's view, if required. if model_config["lstm_use_prev_action"]: self.inference_view_requirements[SampleBatch.PREV_ACTIONS] = \ ViewRequirement(SampleBatch.ACTIONS, space=self.action_space, data_rel_pos=-1) if model_config["lstm_use_prev_reward"]: self.inference_view_requirements[SampleBatch.PREV_REWARDS] = \ ViewRequirement(SampleBatch.REWARDS, data_rel_pos=-1)
def _get_v2_model_class(input_space: gym.Space, model_config: ModelConfigDict, framework: str = "tf") -> Type[ModelV2]: VisionNet = None if framework in ["tf2", "tf", "tfe"]: from ray.rllib.models.tf.fcnet import \ FullyConnectedNetwork as FCNet from ray.rllib.models.tf.visionnet import \ VisionNetwork as VisionNet elif framework == "torch": from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as FCNet) from ray.rllib.models.torch.visionnet import (VisionNetwork as VisionNet) elif framework == "jax": from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as FCNet) else: raise ValueError( "framework={} not supported in `ModelCatalog._get_v2_model_" "class`!".format(framework)) # Discrete/1D obs-spaces or 2D obs space but traj. view framestacking # disabled. num_framestacks = model_config.get("num_framestacks", "auto") if isinstance(input_space, (Discrete, MultiDiscrete)) or \ len(input_space.shape) == 1 or ( len(input_space.shape) == 2 and ( num_framestacks == "auto" or num_framestacks <= 1)): return FCNet # Default Conv2D net. else: if framework == "jax": raise NotImplementedError("No Conv2D default net for JAX yet!") return VisionNet
def get_model_v2(obs_space: gym.Space, action_space: gym.Space, num_outputs: int, model_config: ModelConfigDict, framework: str = "tf", name: str = "default_model", model_interface: type = None, default_model: type = None, **model_kwargs) -> ModelV2: """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. model_config (ModelConfigDict): The "model" sub-config dict within the Trainer's config dict. framework (str): One of "tf2", "tf", "tfe", "torch", or "jax". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ # Validate the given config dict. ModelCatalog._validate_config(config=model_config, framework=framework) if model_config.get("custom_model"): # Allow model kwargs to be overridden / augmented by # custom_model_config. customized_model_kwargs = dict( model_kwargs, **model_config.get("custom_model_config", {})) if isinstance(model_config["custom_model"], type): model_cls = model_config["custom_model"] else: model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) if not issubclass(model_cls, ModelV2): raise ValueError( "`model_cls` must be a ModelV2 sub-class, but is" " {}!".format(model_cls)) logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed(model_cls, model_interface) if framework in ["tf2", "tf", "tfe"]: # Try wrapping custom model with LSTM/attention, if required. if model_config.get("use_lstm") or \ model_config.get("use_attention"): from ray.rllib.models.tf.attention_net import \ AttentionWrapper from ray.rllib.models.tf.recurrent_net import LSTMWrapper wrapped_cls = model_cls forward = wrapped_cls.forward model_cls = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper if model_config.get("use_lstm") else AttentionWrapper) model_cls._wrapped_forward = forward # Obsolete: Track and warn if vars were created but not # registered. Only still do this, if users do register their # variables. If not (which they shouldn't), don't check here. created = set() def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e # User still registered TFModelV2's variables: Check, whether # ok. registered = set(instance.var_list) if len(registered) > 0: not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like you are still using " "`{}.register_variables()` to register your " "model's weights. This is no longer required, but " "if you are still calling this method at least " "once, you must make sure to register all created " "variables properly. The missing variables are {}," " and you only registered {}. " "Did you forget to call `register_variables()` on " "some of the variables in question?".format( instance, not_registered, registered)) elif framework == "torch": # Try wrapping custom model with LSTM/attention, if required. if model_config.get("use_lstm") or \ model_config.get("use_attention"): from ray.rllib.models.torch.attention_net import \ AttentionWrapper from ray.rllib.models.torch.recurrent_net import \ LSTMWrapper wrapped_cls = model_cls forward = wrapped_cls.forward model_cls = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper if model_config.get("use_lstm") else AttentionWrapper) model_cls._wrapped_forward = forward # PyTorch automatically tracks nn.Modules inside the parent # nn.Module's constructor. # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e else: raise NotImplementedError( "`framework` must be 'tf2|tf|tfe|torch', but is " "{}!".format(framework)) return instance # Find a default TFModelV2 and wrap with model_interface. if framework in ["tf", "tfe", "tf2"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if not v2_class: raise ValueError("ModelV2 class could not be determined!") if model_config.get("use_lstm") or \ model_config.get("use_attention"): from ray.rllib.models.tf.attention_net import \ AttentionWrapper from ray.rllib.models.tf.recurrent_net import LSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward if model_config.get("use_lstm"): v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) else: v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, AttentionWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) # Find a default TorchModelV2 and wrap with model_interface. elif framework == "torch": # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if not v2_class: raise ValueError("ModelV2 class could not be determined!") if model_config.get("use_lstm") or \ model_config.get("use_attention"): from ray.rllib.models.torch.attention_net import \ AttentionWrapper from ray.rllib.models.torch.recurrent_net import LSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward if model_config.get("use_lstm"): v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) else: v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, AttentionWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) # Find a default JAXModelV2 and wrap with model_interface. elif framework == "jax": v2_class = \ default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) else: raise NotImplementedError( "`framework` must be 'tf2|tf|tfe|torch', but is " "{}!".format(framework))
def get_action_dist(action_space: gym.Space, config: ModelConfigDict, dist_type: Optional[Union[ str, Type[ActionDistribution]]] = None, framework: str = "tf", **kwargs) -> (type, int): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (Optional[dict]): Optional model config. dist_type (Optional[Union[str, Type[ActionDistribution]]]): Identifier of the action distribution (str) interpreted as a hint or the actual ActionDistribution class to use. framework (str): One of "tf2", "tf", "tfe", "torch", or "jax". kwargs (dict): Optional kwargs to pass on to the Distribution's constructor. Returns: Tuple: - dist_class (ActionDistribution): Python class of the distribution. - dist_dim (int): The size of the input vector to the distribution. """ dist_cls = None config = config or MODEL_DEFAULTS # Custom distribution given. if config.get("custom_action_dist"): custom_action_config = config.copy() action_dist_name = custom_action_config.pop("custom_action_dist") logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist_cls = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) return ModelCatalog._get_multi_action_distribution( dist_cls, action_space, custom_action_config, framework) # Dist_type is given directly as a class. elif type(dist_type) is type and \ issubclass(dist_type, ActionDistribution) and \ dist_type not in ( MultiActionDistribution, TorchMultiActionDistribution): dist_cls = dist_type # Box space -> DiagGaussian OR Deterministic. elif isinstance(action_space, Box): if action_space.dtype.name.startswith("int"): low_ = np.min(action_space.low) high_ = np.max(action_space.high) assert np.all(action_space.low == low_) assert np.all(action_space.high == high_) dist_cls = TorchMultiCategorical if framework == "torch" \ else MultiCategorical num_cats = int(np.product(action_space.shape)) return partial( dist_cls, input_lens=[high_ - low_ + 1 for _ in range(num_cats)], action_space=action_space), num_cats * (high_ - low_ + 1) else: if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") # TODO(sven): Check for bounds and return SquashedNormal, etc.. if dist_type is None: dist_cls = TorchDiagGaussian if framework == "torch" \ else DiagGaussian elif dist_type == "deterministic": dist_cls = TorchDeterministic if framework == "torch" \ else Deterministic # Discrete Space -> Categorical. elif isinstance(action_space, Discrete): dist_cls = TorchCategorical if framework == "torch" else \ JAXCategorical if framework == "jax" else Categorical # Tuple/Dict Spaces -> MultiAction. elif dist_type in (MultiActionDistribution, TorchMultiActionDistribution) or \ isinstance(action_space, (Tuple, Dict)): return ModelCatalog._get_multi_action_distribution( (MultiActionDistribution if framework == "tf" else TorchMultiActionDistribution), action_space, config, framework) # Simplex -> Dirichlet. elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist_cls = Dirichlet # MultiDiscrete -> MultiCategorical. elif isinstance(action_space, MultiDiscrete): dist_cls = TorchMultiCategorical if framework == "torch" else \ MultiCategorical return partial(dist_cls, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) # Unknown type -> Error. else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist_cls, dist_cls.required_model_output_shape( action_space, config)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", activation=activation, data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) v_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] self._value_out = v_layer self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens", []) no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential(*vf_layers) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__( self, *, env_creator: Callable[[EnvContext], EnvType], validate_env: Optional[Callable[[EnvType, EnvContext], None]] = None, policy_spec: Union[type, Dict[ str, Tuple[Optional[type], gym.Space, gym.Space, PartialTrainerConfigDict]]] = None, policy_mapping_fn: Optional[Callable[[AgentID], PolicyID]] = None, policies_to_train: Optional[List[PolicyID]] = None, tf_session_creator: Optional[Callable[[], "tf1.Session"]] = None, rollout_fragment_length: int = 100, batch_mode: str = "truncate_episodes", episode_horizon: int = None, preprocessor_pref: str = "deepmind", sample_async: bool = False, compress_observations: bool = False, num_envs: int = 1, observation_fn: "ObservationFunction" = None, observation_filter: str = "NoFilter", clip_rewards: bool = None, clip_actions: bool = True, env_config: EnvConfigDict = None, model_config: ModelConfigDict = None, policy_config: TrainerConfigDict = None, worker_index: int = 0, num_workers: int = 0, monitor_path: str = None, log_dir: str = None, log_level: str = None, callbacks: Type["DefaultCallbacks"] = None, input_creator: Callable[[ IOContext ], InputReader] = lambda ioctx: ioctx.default_sampler_input(), input_evaluation: List[str] = frozenset([]), output_creator: Callable[ [IOContext], OutputWriter] = lambda ioctx: NoopOutput(), remote_worker_envs: bool = False, remote_env_batch_wait_ms: int = 0, soft_horizon: bool = False, no_done_at_end: bool = False, seed: int = None, extra_python_environs: dict = None, fake_sampler: bool = False, spaces: Optional[Dict[PolicyID, Tuple[gym.spaces.Space, gym.spaces.Space]]] = None, policy: Union[type, Dict[ str, Tuple[Optional[type], gym.Space, gym.Space, PartialTrainerConfigDict]]] = None, ): """Initialize a rollout worker. Args: env_creator (Callable[[EnvContext], EnvType]): Function that returns a gym.Env given an EnvContext wrapped configuration. validate_env (Optional[Callable[[EnvType, EnvContext], None]]): Optional callable to validate the generated environment (only on worker=0). policy_spec (Union[type, Dict[str, Tuple[Type[Policy], gym.Space, gym.Space, PartialTrainerConfigDict]]]): Either a Policy class or a dict of policy id strings to (Policy class, obs_space, action_space, config)-tuples. If a dict is specified, then we are in multi-agent mode and a policy_mapping_fn can also be set (if not, will map all agents to DEFAULT_POLICY_ID). policy_mapping_fn (Optional[Callable[[AgentID], PolicyID]]): A callable that maps agent ids to policy ids in multi-agent mode. This function will be called each time a new agent appears in an episode, to bind that agent to a policy for the duration of the episode. If not provided, will map all agents to DEFAULT_POLICY_ID. policies_to_train (Optional[List[PolicyID]]): Optional list of policies to train, or None for all policies. tf_session_creator (Optional[Callable[[], tf1.Session]]): A function that returns a TF session. This is optional and only useful with TFPolicy. rollout_fragment_length (int): The target number of env transitions to include in each sample batch returned from this worker. batch_mode (str): One of the following batch modes: "truncate_episodes": Each call to sample() will return a batch of at most `rollout_fragment_length * num_envs` in size. The batch will be exactly `rollout_fragment_length * num_envs` in size if postprocessing does not change batch sizes. Episodes may be truncated in order to meet this size requirement. "complete_episodes": Each call to sample() will return a batch of at least `rollout_fragment_length * num_envs` in size. Episodes will not be truncated, but multiple episodes may be packed within one batch to meet the batch size. Note that when `num_envs > 1`, episode steps will be buffered until the episode completes, and hence batches may contain significant amounts of off-policy data. episode_horizon (int): Whether to stop episodes at this horizon. preprocessor_pref (str): Whether to prefer RLlib preprocessors ("rllib") or deepmind ("deepmind") when applicable. sample_async (bool): Whether to compute samples asynchronously in the background, which improves throughput but can cause samples to be slightly off-policy. compress_observations (bool): If true, compress the observations. They can be decompressed with rllib/utils/compression. num_envs (int): If more than one, will create multiple envs and vectorize the computation of actions. This has no effect if if the env already implements VectorEnv. observation_fn (ObservationFunction): Optional multi-agent observation function. observation_filter (str): Name of observation filter to use. clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to experience postprocessing. Setting to None means clip for Atari only. clip_actions (bool): Whether to clip action values to the range specified by the policy action space. env_config (EnvConfigDict): Config to pass to the env creator. model_config (ModelConfigDict): Config to use when creating the policy model. policy_config (TrainerConfigDict): Config to pass to the policy. In the multi-agent case, this config will be merged with the per-policy configs specified by `policy_spec`. worker_index (int): For remote workers, this should be set to a non-zero and unique value. This index is passed to created envs through EnvContext so that envs can be configured per worker. num_workers (int): For remote workers, how many workers altogether have been created? monitor_path (str): Write out episode stats and videos to this directory if specified. log_dir (str): Directory where logs can be placed. log_level (str): Set the root log level on creation. callbacks (DefaultCallbacks): Custom training callbacks. input_creator (Callable[[IOContext], InputReader]): Function that returns an InputReader object for loading previous generated experiences. input_evaluation (List[str]): How to evaluate the policy performance. This only makes sense to set when the input is reading offline data. The possible values include: - "is": the step-wise importance sampling estimator. - "wis": the weighted step-wise is estimator. - "simulation": run the environment in the background, but use this data for evaluation only and never for learning. output_creator (Callable[[IOContext], OutputWriter]): Function that returns an OutputWriter object for saving generated experiences. remote_worker_envs (bool): If using num_envs > 1, whether to create those new envs in remote processes instead of in the current process. This adds overheads, but can make sense if your envs remote_env_batch_wait_ms (float): Timeout that remote workers are waiting when polling environments. 0 (continue when at least one env is ready) is a reasonable default, but optimal value could be obtained by measuring your environment step / reset and model inference perf. soft_horizon (bool): Calculate rewards but don't reset the environment when the horizon is hit. no_done_at_end (bool): Ignore the done=True at the end of the episode and instead record done=False. seed (int): Set the seed of both np and tf to this value to to ensure each remote worker has unique exploration behavior. extra_python_environs (dict): Extra python environments need to be set. fake_sampler (bool): Use a fake (inf speed) sampler for testing. spaces (Optional[Dict[PolicyID, Tuple[gym.spaces.Space, gym.spaces.Space]]]): An optional space dict mapping policy IDs to (obs_space, action_space)-tuples. This is used in case no Env is created on this RolloutWorker. policy: Obsoleted arg. Use `policy_spec` instead. """ # Deprecated arg. if policy is not None: deprecation_warning("policy", "policy_spec", error=False) policy_spec = policy assert policy_spec is not None, "Must provide `policy_spec` when " \ "creating RolloutWorker!" self._original_kwargs: dict = locals().copy() del self._original_kwargs["self"] global _global_worker _global_worker = self # set extra environs first if extra_python_environs: for key, value in extra_python_environs.items(): os.environ[key] = str(value) def gen_rollouts(): while True: yield self.sample() ParallelIteratorWorker.__init__(self, gen_rollouts, False) policy_config: TrainerConfigDict = policy_config or {} if (tf1 and policy_config.get("framework") in ["tf2", "tfe"] # This eager check is necessary for certain all-framework tests # that use tf's eager_mode() context generator. and not tf1.executing_eagerly()): tf1.enable_eager_execution() if log_level: logging.getLogger("ray.rllib").setLevel(log_level) if worker_index > 1: disable_log_once_globally() # only need 1 worker to log elif log_level == "DEBUG": enable_periodic_logging() env_context = EnvContext(env_config or {}, worker_index) self.env_context = env_context self.policy_config: TrainerConfigDict = policy_config if callbacks: self.callbacks: "DefaultCallbacks" = callbacks() else: from ray.rllib.agents.callbacks import DefaultCallbacks self.callbacks: "DefaultCallbacks" = DefaultCallbacks() self.worker_index: int = worker_index self.num_workers: int = num_workers model_config: ModelConfigDict = model_config or {} policy_mapping_fn = (policy_mapping_fn or (lambda agent_id: DEFAULT_POLICY_ID)) if not callable(policy_mapping_fn): raise ValueError("Policy mapping function not callable?") self.env_creator: Callable[[EnvContext], EnvType] = env_creator self.rollout_fragment_length: int = rollout_fragment_length * num_envs self.batch_mode: str = batch_mode self.compress_observations: bool = compress_observations self.preprocessing_enabled: bool = True self.last_batch: SampleBatchType = None self.global_vars: dict = None self.fake_sampler: bool = fake_sampler # No Env will be used in this particular worker (not needed). if worker_index == 0 and num_workers > 0 and \ policy_config["create_env_on_driver"] is False: self.env = None # Create an env for this worker. else: self.env = _validate_env(env_creator(env_context)) if validate_env is not None: validate_env(self.env, self.env_context) if isinstance(self.env, (BaseEnv, MultiAgentEnv)): def wrap(env): return env # we can't auto-wrap these env types elif is_atari(self.env) and \ not model_config.get("custom_preprocessor") and \ preprocessor_pref == "deepmind": # Deepmind wrappers already handle all preprocessing. self.preprocessing_enabled = False # If clip_rewards not explicitly set to False, switch it # on here (clip between -1.0 and 1.0). if clip_rewards is None: clip_rewards = True def wrap(env): env = wrap_deepmind( env, dim=model_config.get("dim"), framestack=model_config.get("framestack")) if monitor_path: from gym import wrappers env = wrappers.Monitor(env, monitor_path, resume=True) return env else: def wrap(env): if monitor_path: from gym import wrappers env = wrappers.Monitor(env, monitor_path, resume=True) return env self.env: EnvType = wrap(self.env) def make_env(vector_index): return wrap( env_creator( env_context.copy_with_overrides( worker_index=worker_index, vector_index=vector_index, remote=remote_worker_envs))) self.make_env_fn = make_env self.tf_sess = None policy_dict = _validate_and_canonicalize( policy_spec, self.env, spaces=spaces) self.policies_to_train: List[PolicyID] = policies_to_train or list( policy_dict.keys()) self.policy_map: Dict[PolicyID, Policy] = None self.preprocessors: Dict[PolicyID, Preprocessor] = None # set numpy and python seed if seed is not None: np.random.seed(seed) random.seed(seed) if not hasattr(self.env, "seed"): logger.info("Env doesn't support env.seed(): {}".format( self.env)) else: self.env.seed(seed) try: assert torch is not None torch.manual_seed(seed) except AssertionError: logger.info("Could not seed torch") if _has_tensorflow_graph(policy_dict) and not ( tf1 and tf1.executing_eagerly()): if not tf1: raise ImportError("Could not import tensorflow") with tf1.Graph().as_default(): if tf_session_creator: self.tf_sess = tf_session_creator() else: self.tf_sess = tf1.Session( config=tf1.ConfigProto( gpu_options=tf1.GPUOptions(allow_growth=True))) with self.tf_sess.as_default(): # set graph-level seed if seed is not None: tf1.set_random_seed(seed) self.policy_map, self.preprocessors = \ self._build_policy_map(policy_dict, policy_config) else: self.policy_map, self.preprocessors = self._build_policy_map( policy_dict, policy_config) if (ray.is_initialized() and ray.worker._mode() != ray.worker.LOCAL_MODE): # Check available number of GPUs if not ray.get_gpu_ids(): logger.debug("Creating policy evaluation worker {}".format( worker_index) + " on CPU (please ignore any CUDA init errors)") elif (policy_config["framework"] in ["tf2", "tf", "tfe"] and not tf.config.experimental.list_physical_devices("GPU")) or \ (policy_config["framework"] == "torch" and not torch.cuda.is_available()): raise RuntimeError( "GPUs were assigned to this worker by Ray, but " "your DL framework ({}) reports GPU acceleration is " "disabled. This could be due to a bad CUDA- or {} " "installation.".format(policy_config["framework"], policy_config["framework"])) self.multiagent: bool = set( self.policy_map.keys()) != {DEFAULT_POLICY_ID} if self.multiagent and self.env is not None: if not ((isinstance(self.env, MultiAgentEnv) or isinstance(self.env, ExternalMultiAgentEnv)) or isinstance(self.env, BaseEnv)): raise ValueError( "Have multiple policies {}, but the env ".format( self.policy_map) + "{} is not a subclass of BaseEnv, MultiAgentEnv or " "ExternalMultiAgentEnv?".format(self.env)) self.filters: Dict[PolicyID, Filter] = { policy_id: get_filter(observation_filter, policy.observation_space.shape) for (policy_id, policy) in self.policy_map.items() } if self.worker_index == 0: logger.info("Built filter map: {}".format(self.filters)) self.num_envs: int = num_envs if self.env is None: self.async_env = None elif "custom_vector_env" in policy_config: custom_vec_wrapper = policy_config["custom_vector_env"] self.async_env = custom_vec_wrapper(self.env) else: # Always use vector env for consistency even if num_envs = 1. self.async_env: BaseEnv = BaseEnv.to_base_env( self.env, make_env=make_env, num_envs=num_envs, remote_envs=remote_worker_envs, remote_env_batch_wait_ms=remote_env_batch_wait_ms) # `truncate_episodes`: Allow a batch to contain more than one episode # (fragments) and always make the batch `rollout_fragment_length` # long. if self.batch_mode == "truncate_episodes": pack = True # `complete_episodes`: Never cut episodes and sampler will return # exactly one (complete) episode per poll. elif self.batch_mode == "complete_episodes": rollout_fragment_length = float("inf") pack = False else: raise ValueError("Unsupported batch mode: {}".format( self.batch_mode)) self.io_context: IOContext = IOContext(log_dir, policy_config, worker_index, self) self.reward_estimators: List[OffPolicyEstimator] = [] for method in input_evaluation: if method == "simulation": logger.warning( "Requested 'simulation' input evaluation method: " "will discard all sampler outputs and keep only metrics.") sample_async = True elif method == "is": ise = ImportanceSamplingEstimator.create(self.io_context) self.reward_estimators.append(ise) elif method == "wis": wise = WeightedImportanceSamplingEstimator.create( self.io_context) self.reward_estimators.append(wise) else: raise ValueError( "Unknown evaluation method: {}".format(method)) if self.env is None: self.sampler = None elif sample_async: self.sampler = AsyncSampler( worker=self, env=self.async_env, policies=self.policy_map, policy_mapping_fn=policy_mapping_fn, preprocessors=self.preprocessors, obs_filters=self.filters, clip_rewards=clip_rewards, rollout_fragment_length=rollout_fragment_length, callbacks=self.callbacks, horizon=episode_horizon, multiple_episodes_in_batch=pack, tf_sess=self.tf_sess, clip_actions=clip_actions, blackhole_outputs="simulation" in input_evaluation, soft_horizon=soft_horizon, no_done_at_end=no_done_at_end, observation_fn=observation_fn, _use_trajectory_view_api=policy_config.get( "_use_trajectory_view_api", False)) # Start the Sampler thread. self.sampler.start() else: self.sampler = SyncSampler( worker=self, env=self.async_env, policies=self.policy_map, policy_mapping_fn=policy_mapping_fn, preprocessors=self.preprocessors, obs_filters=self.filters, clip_rewards=clip_rewards, rollout_fragment_length=rollout_fragment_length, callbacks=self.callbacks, horizon=episode_horizon, multiple_episodes_in_batch=pack, tf_sess=self.tf_sess, clip_actions=clip_actions, soft_horizon=soft_horizon, no_done_at_end=no_done_at_end, observation_fn=observation_fn, _use_trajectory_view_api=policy_config.get( "_use_trajectory_view_api", False)) self.input_reader: InputReader = input_creator(self.io_context) self.output_writer: OutputWriter = output_creator(self.io_context) logger.debug( "Created rollout worker with env {} ({}), policies {}".format( self.async_env, self.env, self.policy_map))
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) # At this point, self.num_outputs is the number of nodes coming # from the wrapped (underlying) model. In other words, self.num_outputs # is the input size for the LSTM layer. # If None, set it to the observation space. if self.num_outputs is None: self.num_outputs = int(np.product(self.obs_space.shape)) self.cell_size = model_config["lstm_cell_size"] self.time_major = model_config.get("_time_major", False) self.use_prev_action = model_config["lstm_use_prev_action"] self.use_prev_reward = model_config["lstm_use_prev_reward"] if isinstance(action_space, Discrete): self.action_dim = action_space.n elif isinstance(action_space, MultiDiscrete): self.action_dim = np.sum(action_space.nvec) elif action_space.shape is not None: self.action_dim = int(np.product(action_space.shape)) else: self.action_dim = int(len(action_space)) # Add prev-action/reward nodes to input to LSTM. if self.use_prev_action: self.num_outputs += self.action_dim if self.use_prev_reward: self.num_outputs += 1 # Define actual LSTM layer (with num_outputs being the nodes coming # from the wrapped (underlying) layer). self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=not self.time_major) # Set self.num_outputs to the number of output nodes desired by the # caller of this constructor. self.num_outputs = num_outputs # Postprocess LSTM output with another hidden layer and compute values. self._logits_branch = SlimFC(in_size=self.cell_size, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC(in_size=self.cell_size, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) # __sphinx_doc_begin__ # Add prev-a/r to this model's view, if required. if model_config["lstm_use_prev_action"]: self.view_requirements[SampleBatch.PREV_ACTIONS] = \ ViewRequirement(SampleBatch.ACTIONS, space=self.action_space, shift=-1) if model_config["lstm_use_prev_reward"]: self.view_requirements[SampleBatch.PREV_REWARDS] = \ ViewRequirement(SampleBatch.REWARDS, shift=-1)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str = 'COMATorchModel', communication: bool = True): nn.Module.__init__(self) super(COMATorchModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.communication = communication assert self.is_time_major() self.recurrent = True if hasattr(self.obs_space, "original_space") and isinstance( self.obs_space.original_space, gym.spaces.Dict): original_space = self.obs_space.original_space self.has_avail_actions = 'avail_actions' in original_space.spaces self.has_real_state = 'state' in original_space.spaces self.has_q_value = 'q_value' in original_space.spaces self.has_value = 'value' in original_space.spaces self.true_obs_space = original_space['obs'] if self.has_real_state: self.state_space = original_space['state'] else: self.has_real_state = False self.has_q_value = False self.has_value = False self.state_space = None self.offsets = None self.true_obs_space = self.obs_space if not isinstance(self.true_obs_space, Box): raise UnsupportedSpaceException( "Space {} is not supported as observation.".format( self.true_obs_space)) if not isinstance(action_space, MultiDiscrete): raise UnsupportedSpaceException( "Space {} is not supported as action.".format( self.action_space)) assert len( self.true_obs_space.shape) == 2, "Observation space is supposed " \ "to have 2 dimensions." self.nbr_agents = self.true_obs_space.shape[0] self.nbr_actions = int(self.action_space.nvec[0]) self.gru_cell_size = model_config.get("gru_cell_size") self.inference_view_requirements.update({ SampleBatch.OBS: ViewRequirement(shift=0), SampleBatch.PREV_ACTIONS: ViewRequirement(SampleBatch.ACTIONS, space=action_space, shift=-1), SampleBatch.ACTIONS: ViewRequirement(space=action_space), "state_in_{}".format(0): ViewRequirement("state_out_{}".format(0), space=Box(-1.0, -1.0, shape=(self.nbr_agents, self.gru_cell_size)), shift=-1) }) self.stage1, self.gru, self.stage2 = self.create_actor() self.critic = self.create_critic() self.target_critic = self.create_critic() self.target_critic.load_state_dict(self.critic.state_dict())
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D( out_size if post_fcnet_hiddens else num_outputs, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) feature_out = last_layer for i, out_size in enumerate(layer_sizes): feature_out = last_layer last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: if post_fcnet_hiddens: last_cnn = last_layer = tf.keras.layers.Conv2D( post_fcnet_hiddens[0], [1, 1], activation=post_fcnet_activation, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens[1:] + [num_outputs]): feature_out = last_layer last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i + 1), activation=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, kernel_initializer=normc_initializer(1.0))( last_layer) else: feature_out = last_layer last_cnn = last_layer = tf.keras.layers.Conv2D( num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(last_cnn.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) feature_out = last_layer self.num_outputs = last_layer.shape[1] logits_out = last_layer # Build the value layers if vf_share_layers: if not self.last_layer_is_flattened: feature_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(feature_out) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(feature_out) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): raise ValueError("Config for conv_filters is required") TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] # FIXME add stacking here (w, in_channels) = obs_space.shape in_size = w for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding_1d(in_size, kernel, stride) layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: layers.append( SlimConv1d( in_channels, num_outputs, kernel, stride, None, # padding=valid activation_fn=activation)) out_channels = num_outputs # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv1d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = np.ceil((in_size - kernel) / stride) padding, _ = same_padding_1d(in_size, 1, 1) self._logits = SlimConv1d(out_channels, num_outputs, 1, 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] (h, w, in_channels) = obs_space.shape assert h == 1 in_size = w for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding_1d(in_size, kernel, stride) vf_layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv1d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def get_model_v2(obs_space: gym.Space, action_space: gym.Space, num_outputs: int, model_config: ModelConfigDict, framework: str = "tf", name: str = "default_model", model_interface: type = None, default_model: type = None, **model_kwargs) -> ModelV2: """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. framework (str): One of "tf", "tfe", or "torch". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ if model_config.get("custom_model"): if "custom_options" in model_config and \ model_config["custom_options"] != DEPRECATED_VALUE: deprecation_warning("model.custom_options", "model.custom_model_config", error=False) model_config["custom_model_config"] = \ model_config.pop("custom_options") # Allow model kwargs to be overriden / augmented by # custom_model_config. customized_model_kwargs = dict( model_kwargs, **model_config.get("custom_model_config", {})) if isinstance(model_config["custom_model"], type): model_cls = model_config["custom_model"] else: model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) # TODO(sven): Hard-deprecate Model(V1). if issubclass(model_cls, ModelV2): logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed( model_cls, model_interface) if framework in ["tf", "tfe"]: # Track and warn if vars were created but not registered. created = set() def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e registered = set(instance.variables()) not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like variables {} were created as part " "of {} but does not appear in model.variables() " "({}). Did you forget to call " "model.register_variables() on the variables in " "question?".format(not_registered, instance, registered)) else: # PyTorch automatically tracks nn.Modules inside the parent # nn.Module's constructor. # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e return instance # TODO(sven): Hard-deprecate Model(V1). This check will be # superflous then. elif tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however you specified a custom model {}".format( model_cls)) if framework in ["tf", "tfe", "tf2"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if model_config.get("use_lstm"): wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) v2_class._wrapped_forward = forward # fallback to a default v1 model if v2_class is None: if tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however there is no default V2 model for this " "observation space: {}, use_lstm={}".format( obs_space, model_config.get("use_lstm"))) v2_class = make_v1_wrapper(ModelCatalog.get_model) # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) elif framework == "torch": v2_class = \ default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if model_config.get("use_lstm"): from ray.rllib.models.torch.recurrent_net import LSTMWrapper \ as TorchLSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, TorchLSTMWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) else: raise NotImplementedError( "`framework` must be 'tf|tfe|torch', but is " "{}!".format(framework))
def get_action_dist(action_space: gym.Space, config: ModelConfigDict, dist_type: str = None, framework: str = "tf", **kwargs) -> (type, int): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (Optional[dict]): Optional model config. dist_type (Optional[str]): Identifier of the action distribution interpreted as a hint. framework (str): One of "tf", "tfe", or "torch". kwargs (dict): Optional kwargs to pass on to the Distribution's constructor. Returns: Tuple: - dist_class (ActionDistribution): Python class of the distribution. - dist_dim (int): The size of the input vector to the distribution. """ dist = None config = config or MODEL_DEFAULTS # Custom distribution given. if config.get("custom_action_dist"): action_dist_name = config["custom_action_dist"] logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) # Dist_type is given directly as a class. elif type(dist_type) is type and \ issubclass(dist_type, ActionDistribution) and \ dist_type not in ( MultiActionDistribution, TorchMultiActionDistribution): dist = dist_type # Box space -> DiagGaussian OR Deterministic. elif isinstance(action_space, gym.spaces.Box): if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") # TODO(sven): Check for bounds and return SquashedNormal, etc.. if dist_type is None: dist = TorchDiagGaussian if framework == "torch" \ else DiagGaussian elif dist_type == "deterministic": dist = TorchDeterministic if framework == "torch" \ else Deterministic # Discrete Space -> Categorical. elif isinstance(action_space, gym.spaces.Discrete): dist = TorchCategorical if framework == "torch" else Categorical # Tuple/Dict Spaces -> MultiAction. elif dist_type in (MultiActionDistribution, TorchMultiActionDistribution) or \ isinstance(action_space, (gym.spaces.Tuple, gym.spaces.Dict)): flat_action_space = flatten_space(action_space) child_dists_and_in_lens = tree.map_structure( lambda s: ModelCatalog.get_action_dist( s, config, framework=framework), flat_action_space) child_dists = [e[0] for e in child_dists_and_in_lens] input_lens = [int(e[1]) for e in child_dists_and_in_lens] return partial((TorchMultiActionDistribution if framework == "torch" else MultiActionDistribution), action_space=action_space, child_distributions=child_dists, input_lens=input_lens), int(sum(input_lens)) # Simplex -> Dirichlet. elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist = Dirichlet # MultiDiscrete -> MultiCategorical. elif isinstance(action_space, gym.spaces.MultiDiscrete): dist = TorchMultiCategorical if framework == "torch" else \ MultiCategorical return partial(dist, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) # Unknown type -> Error. else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist, dist.required_model_output_shape(action_space, config)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") self.traj_view_framestacking = False # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: input_shape = obs_space.shape + (model_config["num_framestacks"], ) self.data_format = "channels_first" self.traj_view_framestacking = True else: input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): if i == 1: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) else: input_layer = last_layer last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 2))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 1))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.Add()([input_layer, last_layer]) last_layer = tf.keras.layers.ReLU()(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format( 2 * len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) #p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Conv2D( filters=1, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(2 * len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] logits_out = p_layer self._value_out = v_layer ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' ''' # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) #last_layer = tf.keras.layers.Lambda( # lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) ''' self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def _get_v2_model_class(input_space: gym.Space, model_config: ModelConfigDict, framework: str = "tf") -> Type[ModelV2]: VisionNet = None ComplexNet = None Keras_FCNet = None Keras_VisionNet = None if framework in ["tf2", "tf", "tfe"]: from ray.rllib.models.tf.fcnet import \ FullyConnectedNetwork as FCNet, \ Keras_FullyConnectedNetwork as Keras_FCNet from ray.rllib.models.tf.visionnet import \ VisionNetwork as VisionNet, \ Keras_VisionNetwork as Keras_VisionNet from ray.rllib.models.tf.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "torch": from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as FCNet) from ray.rllib.models.torch.visionnet import (VisionNetwork as VisionNet) from ray.rllib.models.torch.complex_input_net import \ ComplexInputNetwork as ComplexNet elif framework == "jax": from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as FCNet) else: raise ValueError( "framework={} not supported in `ModelCatalog._get_v2_model_" "class`!".format(framework)) # Complex space, where at least one sub-space is image. # -> Complex input model (which auto-flattens everything, but correctly # processes image components with default CNN stacks). space_to_check = input_space if not hasattr( input_space, "original_space") else input_space.original_space if isinstance(input_space, (Dict, Tuple)) or (isinstance( space_to_check, (Dict, Tuple)) and any( isinstance(s, Box) and len(s.shape) >= 2 for s in tree.flatten(space_to_check.spaces))): return ComplexNet # Single, flattenable/one-hot-able space -> Simple FCNet. if isinstance(input_space, (Discrete, MultiDiscrete)) or \ len(input_space.shape) == 1 or ( len(input_space.shape) == 2): # Keras native requested AND no auto-rnn-wrapping. if model_config.get("_use_default_native_models") and Keras_FCNet: return Keras_FCNet # Classic ModelV2 FCNet. else: return FCNet elif framework == "jax": raise NotImplementedError("No non-FC default net for JAX yet!") # Last resort: Conv2D stack for single image spaces. if model_config.get("_use_default_native_models") and Keras_VisionNet: return Keras_VisionNet return VisionNet
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch" ) no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else [] ) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC( in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0), ) ) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride), ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC( in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0), ) ) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d( out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None, ) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self._convs = nn.Sequential(*layers) # If our num_outputs still unknown, we need to do a test pass to # figure out the output dimensions. This could be the case, if we have # the Flatten layer at the end. if self.num_outputs is None: # Create a B=1 dummy sample and push it through out conv-net. dummy_in = ( torch.from_numpy(self.obs_space.sample()) .permute(2, 0, 1) .unsqueeze(0) .float() ) dummy_out = self._convs(dummy_in) self.num_outputs = dummy_out.shape[1] # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None ) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation, ) ) vf_layers.append( SlimConv2d( in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None, ) ) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D(num_outputs, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) conv_out = last_layer # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format( len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if conv_out.shape[1] != 1 or conv_out.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(conv_out.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True conv_out = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) self.num_outputs = conv_out.shape[1] # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format( len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [conv_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): super(FullyConnectedNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) hiddens = model_config.get("fcnet_hiddens", []) + \ model_config.get("post_fcnet_hiddens", []) activation = model_config.get("fcnet_activation") if not model_config.get("fcnet_hiddens", []): activation = model_config.get("post_fcnet_activation") activation = get_activation_fn(activation) no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 self.log_std_var = tf.Variable([0.0] * num_outputs, dtype=tf.float32, name="log_std") # We are using obs_flat, so take the flattened shape as input. inputs = tf.keras.layers.Input(shape=(int(np.product( obs_space.shape)), ), name="observations") # Last hidden layer output (before logits outputs). last_layer = inputs # The action distribution outputs. logits_out = None i = 1 # Create layers 0 to second-last. for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: last_layer = tf.keras.layers.Dense( hiddens[-1], name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) if num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) # Adjust num_outputs to be the number of nodes in the last layer. else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Concat the log std vars to the end of the state-dependent means. if free_log_std and logits_out is not None: def tiled_log_std(x): return tf.tile(tf.expand_dims(self.log_std_var, 0), [tf.shape(x)[0], 1]) log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs) logits_out = tf.keras.layers.Concatenate(axis=1)( [logits_out, log_std_out]) last_vf_layer = None if not vf_share_layers: # Build a parallel set of hidden layers for the value net. last_vf_layer = inputs i = 1 for size in hiddens: last_vf_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_vf_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))( last_vf_layer if last_vf_layer is not None else last_layer) self.base_model = tf.keras.Model(inputs, [ (logits_out if logits_out is not None else last_layer), value_out ])