Python ModelConfigDict примеры использования

Язык программирования: Python

Пространство имен/Пакет: ray.rllib.utils.typing

Класс/Тип: ModelConfigDict

Примеров на hotexamples.com: 30

Python ModelConfigDict - 30 примеров найдено. Это лучшие примеры Python кода для ray.rllib.utils.typing.ModelConfigDict, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get(30)

ModelConfigDict(4)

copy(1)

pop(1)

Основные методы

get (30)

ModelConfigDict (4)

copy (1)

pop (1)

Пример #1

Показать файл

Файл: train_PPO.py Проект: anapervan/Flexible-Tool

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        custom_configs = model_config.get("custom_model_config")
        self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10)

        activation = model_config.get("fcnet_activation", "tanh")

        encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128)
        self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)

        self._all_fc1 = SlimFC(in_size=3,
                               out_size=64,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._all_fc2 = SlimFC(in_size=64,
                               out_size=16,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._action_layer = SlimFC(in_size=16,
                                    out_size=num_outputs,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self._value_layer = SlimFC(in_size=16,
                                   out_size=1,
                                   initializer=normc_initializer(0.01),
                                   activation_fn=None)

        self._features = None

Пример #2

Показать файл

Файл: catalog.py Проект: yyz940922/ray

    def _validate_config(config: ModelConfigDict, framework: str) -> None:
        """Validates a given model config dict.

        Args:
            config (ModelConfigDict): The "model" sub-config dict
                within the Trainer's config dict.
            framework (str): One of "jax", "tf2", "tf", "tfe", or "torch".

        Raises:
            ValueError: If something is wrong with the given config.
        """
        if config.get("use_attention") and config.get("use_lstm"):
            raise ValueError("Only one of `use_lstm` or `use_attention` may "
                             "be set to True!")
        if framework == "jax":
            if config.get("use_attention"):
                raise ValueError("`use_attention` not available for "
                                 "framework=jax so far!")
            elif config.get("use_lstm"):
                raise ValueError("`use_lstm` not available for "
                                 "framework=jax so far!")

        if config.get("framestack") != DEPRECATED_VALUE:
            # deprecation_warning(
            #     old="framestack", new="num_framestacks (int)", error=False)
            # If old behavior is desired, disable traj. view-style
            # framestacking.
            config["num_framestacks"] = 0

Пример #3

Показать файл

    def _get_v2_model_class(input_space: gym.Space,
                            model_config: ModelConfigDict,
                            framework: str = "tf") -> Type[ModelV2]:

        VisionNet = None
        ComplexNet = None
        Keras_FCNet = None
        Keras_VisionNet = None

        if framework in ["tf2", "tf", "tfe"]:
            from ray.rllib.models.tf.fcnet import \
                FullyConnectedNetwork as FCNet, \
                Keras_FullyConnectedNetwork as Keras_FCNet
            from ray.rllib.models.tf.visionnet import \
                VisionNetwork as VisionNet, \
                Keras_VisionNetwork as Keras_VisionNet
            from ray.rllib.models.tf.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "torch":
            from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as
                                                      FCNet)
            from ray.rllib.models.torch.visionnet import (VisionNetwork as
                                                          VisionNet)
            from ray.rllib.models.torch.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "jax":
            from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as
                                                    FCNet)
        else:
            raise ValueError(
                "framework={} not supported in `ModelCatalog._get_v2_model_"
                "class`!".format(framework))

        orig_space = input_space if not hasattr(
            input_space, "original_space") else input_space.original_space

        # `input_space` is 3D Box -> VisionNet.
        if isinstance(input_space, Box) and len(input_space.shape) == 3:
            if framework == "jax":
                raise NotImplementedError("No non-FC default net for JAX yet!")
            elif model_config.get("_use_default_native_models") and \
                    Keras_VisionNet:
                return Keras_VisionNet
            return VisionNet
        # `input_space` is 1D Box -> FCNet.
        elif isinstance(input_space, Box) and len(input_space.shape) == 1 and \
                (not isinstance(orig_space, (Dict, Tuple)) or not any(
                    isinstance(s, Box) and len(s.shape) >= 2
                    for s in tree.flatten(orig_space.spaces))):
            # Keras native requested AND no auto-rnn-wrapping.
            if model_config.get("_use_default_native_models") and Keras_FCNet:
                return Keras_FCNet
            # Classic ModelV2 FCNet.
            else:
                return FCNet
        # Complex (Dict, Tuple, 2D Box (flatten), Discrete, MultiDiscrete).
        else:
            if framework == "jax":
                raise NotImplementedError("No non-FC default net for JAX yet!")
            return ComplexNet

Пример #4

Показать файл

    def _validate_config(config: ModelConfigDict, framework: str) -> None:
        """Validates a given model config dict.

        Args:
            config (ModelConfigDict): The "model" sub-config dict
                within the Trainer's config dict.
            framework (str): One of "jax", "tf2", "tf", "tfe", or "torch".

        Raises:
            ValueError: If something is wrong with the given config.
        """
        # Soft-deprecate custom preprocessors.
        if config.get("custom_preprocessor") is not None:
            deprecation_warning(
                old="model.custom_preprocessor",
                new="gym.ObservationWrapper around your env or handle complex "
                "inputs inside your Model",
                error=False,
            )

        if config.get("use_attention") and config.get("use_lstm"):
            raise ValueError("Only one of `use_lstm` or `use_attention` may "
                             "be set to True!")
        if framework == "jax":
            if config.get("use_attention"):
                raise ValueError("`use_attention` not available for "
                                 "framework=jax so far!")
            elif config.get("use_lstm"):
                raise ValueError("`use_lstm` not available for "
                                 "framework=jax so far!")

Пример #5

Показать файл

Файл: models.py Проект: lairning/drl-trainers

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        # Nonlinearity for fully connected net (tanh, relu). Default: "tanh"
        activation = model_config.get("fcnet_activation")
        # Number of hidden layers for fully connected net. Default: [256, 256]
        hiddens = [256, 256]  # model_config.get("fcnet_hiddens", [])
        # Whether to skip the final linear layer used to resize the hidden layer
        # outputs to size `num_outputs`. If True, then the last hidden layer
        # should already match num_outputs.
        # no_final_linear = False

        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = False

        self._embedd = nn.Embedding(
            int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)

        # Player Hot Encoded = 3 * Number of Cards Played per trick = 4
        # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4

        self._hidden_layers = self._build_hidden_layers(
            first_layer_size=FIRST_LAYER_SIZE,
            hiddens=hiddens,
            activation=activation)

        self._value_branch_separate = None
        self._value_embedding = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            self._value_embedding = nn.Embedding(
                int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)
            self._value_branch_separate = self._build_hidden_layers(
                first_layer_size=FIRST_LAYER_SIZE,
                hiddens=hiddens,
                activation=activation)
        self._logits = SlimFC(in_size=hiddens[-1],
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=hiddens[-1],
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._cards_in = None
        self._players_in = None

Пример #6

Показать файл

def sensorModel(num_actions):
    modelConfig = ModelConfigDict()
    modelConfig["fcnet_hiddens"] = [[5 * 3 + 4, 32], [32, 64], [64, 128],
                                    [128, 64], [64, 32], [32, num_actions]]
    modelConfig["fcnet_activation"] = "relu"

    return modelConfig

Пример #7

Показать файл

def vgg16(num_actions, inChannel):
    modelConfig = ModelConfigDict()
    modelConfig["inChannel"] = inChannel
    """modelConfig["conv_filters"] = [[64, 3, 1, 1, [0]], #[out_channels, kernel_size, stride, padding, [pooling (max), kernel, stride]]
                                   [64, 3, 1, 1, [1, 2, 2]],
                                   [128, 3, 1, 1, [0]],
                                   [128, 3, 1, 1, [1, 2, 2]],
                                   [256, 3, 1, 1, [0]],
                                   [256, 3, 1, 1, [0]],
                                   [256, 3, 1, 1, [1, 2, 2]],
                                   [512, 3, 1, 1, [0]],
                                   [512, 3, 1, 1, [0]],
                                   [512, 3, 1, 1, [1, 2, 2]],
                                   [512, 3, 1, 1, [0]],
                                   [512, 3, 1, 1, [0]],
                                   [512, 3, 1, 1, [1, 2, 2]]]   """
    modelConfig["conv_filters"] = [
        [
            64, 3, 1, 1, [0]
        ],  # [out_channels, kernel_size, stride, padding, [pooling (max), kernel, stride]]
        [64, 3, 1, 1, [1, 2, 2]],
        [128, 3, 1, 1, [0]],
        [128, 3, 1, 1, [1, 2, 2]],
        [256, 3, 1, 1, [0]],
        [256, 3, 1, 1, [1, 2, 2]]
    ]
    modelConfig["conv_activation"] = "relu"
    modelConfig["fcnet_hiddens"] = [[256 * 32 * 32 + 5 * 3 + 4, 64],
                                    [64, num_actions]]
    modelConfig["fcnet_activation"] = "relu"

    return modelConfig

Пример #8

Показать файл

Файл: models.py Проект: mgarciaortiz/spg-experiments

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            raise ValueError("Config for conv_filters is required")
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None
        # Holds the current "base" output (before logits layer).
        self._features = None
        self.num_outputs = num_outputs if num_outputs else action_space.shape[0]
        self.filters = filters
        self.activation = activation
        self.obs_space = obs_space

        self._create_model()

Пример #9

Показать файл

    def _validate_config(
        config: ModelConfigDict, action_space: gym.spaces.Space, framework: str
    ) -> None:
        """Validates a given model config dict.

        Args:
            config: The "model" sub-config dict
                within the Trainer's config dict.
            action_space: The action space of the model, whose config are
                    validated.
            framework: One of "jax", "tf2", "tf", "tfe", or "torch".

        Raises:
            ValueError: If something is wrong with the given config.
        """
        # Soft-deprecate custom preprocessors.
        if config.get("custom_preprocessor") is not None:
            deprecation_warning(
                old="model.custom_preprocessor",
                new="gym.ObservationWrapper around your env or handle complex "
                "inputs inside your Model",
                error=False,
            )

        if config.get("use_attention") and config.get("use_lstm"):
            raise ValueError(
                "Only one of `use_lstm` or `use_attention` may be set to True!"
            )

        # For complex action spaces, only allow prev action inputs to
        # LSTMs and attention nets iff `_disable_action_flattening=True`.
        # TODO: `_disable_action_flattening=True` will be the default in
        #  the future.
        if (
            (
                config.get("lstm_use_prev_action")
                or config.get("attention_use_n_prev_actions", 0) > 0
            )
            and not config.get("_disable_action_flattening")
            and isinstance(action_space, (Tuple, Dict))
        ):
            raise ValueError(
                "For your complex action space (Tuple|Dict) and your model's "
                "`prev-actions` setup of your model, you must set "
                "`_disable_action_flattening=True` in your main config dict!"
            )

        if framework == "jax":
            if config.get("use_attention"):
                raise ValueError(
                    "`use_attention` not available for framework=jax so far!"
                )
            elif config.get("use_lstm"):
                raise ValueError("`use_lstm` not available for framework=jax so far!")

Пример #10

Показать файл

Файл: catalog.py Проект: yyz940922/ray

    def _get_v2_model_class(input_space: gym.Space,
                            model_config: ModelConfigDict,
                            framework: str = "tf") -> Type[ModelV2]:

        VisionNet = None
        ComplexNet = None

        if framework in ["tf2", "tf", "tfe"]:
            from ray.rllib.models.tf.fcnet import \
                FullyConnectedNetwork as FCNet
            from ray.rllib.models.tf.visionnet import \
                VisionNetwork as VisionNet
            from ray.rllib.models.tf.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "torch":
            from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as
                                                      FCNet)
            from ray.rllib.models.torch.visionnet import (VisionNetwork as
                                                          VisionNet)
            from ray.rllib.models.torch.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "jax":
            from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as
                                                    FCNet)
        else:
            raise ValueError(
                "framework={} not supported in `ModelCatalog._get_v2_model_"
                "class`!".format(framework))

        # Discrete/1D obs-spaces or 2D obs space but traj. view framestacking
        # disabled.
        num_framestacks = model_config.get("num_framestacks", "auto")

        # Tuple space, where at least one sub-space is image.
        # -> Complex input model.
        space_to_check = input_space if not hasattr(
            input_space, "original_space") else input_space.original_space
        if isinstance(input_space,
                      Tuple) or (isinstance(space_to_check, Tuple) and any(
                          isinstance(s, Box) and len(s.shape) >= 2
                          for s in space_to_check.spaces)):
            return ComplexNet

        # Single, flattenable/one-hot-abe space -> Simple FCNet.
        if isinstance(input_space, (Discrete, MultiDiscrete)) or \
                len(input_space.shape) == 1 or (
                len(input_space.shape) == 2 and (
                num_framestacks == "auto" or num_framestacks <= 1)):
            return FCNet

        elif framework == "jax":
            raise NotImplementedError("No non-FC default net for JAX yet!")

        # Last resort: Conv2D stack for single image spaces.
        return VisionNet

Пример #11

Показать файл

def jointModel(num_actions, inChannel):
    modelConfig = ModelConfigDict()
    modelConfig["inChannel"] = inChannel
    modelConfig["conv_filters"] = [[32, 4, 4, 0, [0]], [64, 4, 2, 0, [0]],
                                   [128, 2, 2, 0, [0]]]
    modelConfig["conv_activation"] = "relu"
    modelConfig["fcnet_hiddens"] = [[128 * 15 * 15 + 5 * 3 + 4, 64],
                                    [64, num_actions]]
    modelConfig["fcnet_activation"] = "relu"

    return modelConfig

Пример #12

Показать файл

    def _validate_config(config: ModelConfigDict, framework: str) -> None:
        """Validates a given model config dict.

        Args:
            config (ModelConfigDict): The "model" sub-config dict
                within the Trainer's config dict.
            framework (str): One of "jax", "tf2", "tf", "tfe", or "torch".

        Raises:
            ValueError: If something is wrong with the given config.
        """
        if config.get("use_attention") and config.get("use_lstm"):
            raise ValueError("Only one of `use_lstm` or `use_attention` may "
                             "be set to True!")
        if framework == "jax":
            if config.get("use_attention"):
                raise ValueError("`use_attention` not available for "
                                 "framework=jax so far!")
            elif config.get("use_lstm"):
                raise ValueError("`use_lstm` not available for "
                                 "framework=jax so far!")

Пример #13

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        self.cell_size = model_config["lstm_cell_size"]
        self.time_major = model_config.get("_time_major", False)
        self.use_prev_action = model_config["lstm_use_prev_action"]
        self.use_prev_reward = model_config["lstm_use_prev_reward"]

        if isinstance(action_space, Discrete):
            self.action_dim = action_space.n
        elif isinstance(action_space, MultiDiscrete):
            self.action_dim = np.product(action_space.nvec)
        elif action_space.shape is not None:
            self.action_dim = int(np.product(action_space.shape))
        else:
            self.action_dim = int(len(action_space))

        # Add prev-action/reward nodes to input to LSTM.
        if self.use_prev_action:
            self.num_outputs += self.action_dim
        if self.use_prev_reward:
            self.num_outputs += 1

        self.lstm = nn.LSTM(self.num_outputs,
                            self.cell_size,
                            batch_first=not self.time_major)

        self.num_outputs = num_outputs

        # Postprocess LSTM output with another hidden layer and compute values.
        self._logits_branch = SlimFC(in_size=self.cell_size,
                                     out_size=self.num_outputs,
                                     activation_fn=None,
                                     initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(in_size=self.cell_size,
                                    out_size=1,
                                    activation_fn=None,
                                    initializer=torch.nn.init.xavier_uniform_)

        # Add prev-a/r to this model's view, if required.
        if model_config["lstm_use_prev_action"]:
            self.inference_view_requirements[SampleBatch.PREV_ACTIONS] = \
                ViewRequirement(SampleBatch.ACTIONS, space=self.action_space,
                                data_rel_pos=-1)
        if model_config["lstm_use_prev_reward"]:
            self.inference_view_requirements[SampleBatch.PREV_REWARDS] = \
                ViewRequirement(SampleBatch.REWARDS, data_rel_pos=-1)

Пример #14

Показать файл

    def _get_v2_model_class(input_space: gym.Space,
                            model_config: ModelConfigDict,
                            framework: str = "tf") -> Type[ModelV2]:

        VisionNet = None

        if framework in ["tf2", "tf", "tfe"]:
            from ray.rllib.models.tf.fcnet import \
                FullyConnectedNetwork as FCNet
            from ray.rllib.models.tf.visionnet import \
                VisionNetwork as VisionNet
        elif framework == "torch":
            from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as
                                                      FCNet)
            from ray.rllib.models.torch.visionnet import (VisionNetwork as
                                                          VisionNet)
        elif framework == "jax":
            from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as
                                                    FCNet)
        else:
            raise ValueError(
                "framework={} not supported in `ModelCatalog._get_v2_model_"
                "class`!".format(framework))

        # Discrete/1D obs-spaces or 2D obs space but traj. view framestacking
        # disabled.
        num_framestacks = model_config.get("num_framestacks", "auto")
        if isinstance(input_space, (Discrete, MultiDiscrete)) or \
                len(input_space.shape) == 1 or (
                len(input_space.shape) == 2 and (
                num_framestacks == "auto" or num_framestacks <= 1)):
            return FCNet
        # Default Conv2D net.
        else:
            if framework == "jax":
                raise NotImplementedError("No Conv2D default net for JAX yet!")
            return VisionNet

Пример #15

Показать файл

Файл: catalog.py Проект: yyz940922/ray

    def get_model_v2(obs_space: gym.Space,
                     action_space: gym.Space,
                     num_outputs: int,
                     model_config: ModelConfigDict,
                     framework: str = "tf",
                     name: str = "default_model",
                     model_interface: type = None,
                     default_model: type = None,
                     **model_kwargs) -> ModelV2:
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            model_config (ModelConfigDict): The "model" sub-config dict
                within the Trainer's config dict.
            framework (str): One of "tf2", "tf", "tfe", "torch", or "jax".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        # Validate the given config dict.
        ModelCatalog._validate_config(config=model_config, framework=framework)

        if model_config.get("custom_model"):
            # Allow model kwargs to be overridden / augmented by
            # custom_model_config.
            customized_model_kwargs = dict(
                model_kwargs, **model_config.get("custom_model_config", {}))

            if isinstance(model_config["custom_model"], type):
                model_cls = model_config["custom_model"]
            else:
                model_cls = _global_registry.get(RLLIB_MODEL,
                                                 model_config["custom_model"])

            if not issubclass(model_cls, ModelV2):
                raise ValueError(
                    "`model_cls` must be a ModelV2 sub-class, but is"
                    " {}!".format(model_cls))

            logger.info("Wrapping {} as {}".format(model_cls, model_interface))
            model_cls = ModelCatalog._wrap_if_needed(model_cls,
                                                     model_interface)

            if framework in ["tf2", "tf", "tfe"]:
                # Try wrapping custom model with LSTM/attention, if required.
                if model_config.get("use_lstm") or \
                        model_config.get("use_attention"):
                    from ray.rllib.models.tf.attention_net import \
                        AttentionWrapper
                    from ray.rllib.models.tf.recurrent_net import LSTMWrapper

                    wrapped_cls = model_cls
                    forward = wrapped_cls.forward
                    model_cls = ModelCatalog._wrap_if_needed(
                        wrapped_cls, LSTMWrapper
                        if model_config.get("use_lstm") else AttentionWrapper)
                    model_cls._wrapped_forward = forward

                # Obsolete: Track and warn if vars were created but not
                # registered. Only still do this, if users do register their
                # variables. If not (which they shouldn't), don't check here.
                created = set()

                def track_var_creation(next_creator, **kw):
                    v = next_creator(**kw)
                    created.add(v)
                    return v

                with tf.variable_creator_scope(track_var_creation):
                    # Try calling with kwargs first (custom ModelV2 should
                    # accept these as kwargs, not get them from
                    # config["custom_model_config"] anymore).
                    try:
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **customized_model_kwargs)
                    except TypeError as e:
                        # Keyword error: Try old way w/o kwargs.
                        if "__init__() got an unexpected " in e.args[0]:
                            instance = model_cls(obs_space, action_space,
                                                 num_outputs, model_config,
                                                 name, **model_kwargs)
                            logger.warning(
                                "Custom ModelV2 should accept all custom "
                                "options as **kwargs, instead of expecting"
                                " them in config['custom_model_config']!")
                        # Other error -> re-raise.
                        else:
                            raise e

                # User still registered TFModelV2's variables: Check, whether
                # ok.
                registered = set(instance.var_list)
                if len(registered) > 0:
                    not_registered = set()
                    for var in created:
                        if var not in registered:
                            not_registered.add(var)
                    if not_registered:
                        raise ValueError(
                            "It looks like you are still using "
                            "`{}.register_variables()` to register your "
                            "model's weights. This is no longer required, but "
                            "if you are still calling this method at least "
                            "once, you must make sure to register all created "
                            "variables properly. The missing variables are {},"
                            " and you only registered {}. "
                            "Did you forget to call `register_variables()` on "
                            "some of the variables in question?".format(
                                instance, not_registered, registered))
            elif framework == "torch":
                # Try wrapping custom model with LSTM/attention, if required.
                if model_config.get("use_lstm") or \
                        model_config.get("use_attention"):
                    from ray.rllib.models.torch.attention_net import \
                        AttentionWrapper
                    from ray.rllib.models.torch.recurrent_net import \
                        LSTMWrapper

                    wrapped_cls = model_cls
                    forward = wrapped_cls.forward
                    model_cls = ModelCatalog._wrap_if_needed(
                        wrapped_cls, LSTMWrapper
                        if model_config.get("use_lstm") else AttentionWrapper)
                    model_cls._wrapped_forward = forward

                # PyTorch automatically tracks nn.Modules inside the parent
                # nn.Module's constructor.
                # Try calling with kwargs first (custom ModelV2 should
                # accept these as kwargs, not get them from
                # config["custom_model_config"] anymore).
                try:
                    instance = model_cls(obs_space, action_space, num_outputs,
                                         model_config, name,
                                         **customized_model_kwargs)
                except TypeError as e:
                    # Keyword error: Try old way w/o kwargs.
                    if "__init__() got an unexpected " in e.args[0]:
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **model_kwargs)
                        logger.warning(
                            "Custom ModelV2 should accept all custom "
                            "options as **kwargs, instead of expecting"
                            " them in config['custom_model_config']!")
                    # Other error -> re-raise.
                    else:
                        raise e
            else:
                raise NotImplementedError(
                    "`framework` must be 'tf2|tf|tfe|torch', but is "
                    "{}!".format(framework))

            return instance

        # Find a default TFModelV2 and wrap with model_interface.
        if framework in ["tf", "tfe", "tf2"]:
            v2_class = None
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if not v2_class:
                raise ValueError("ModelV2 class could not be determined!")

            if model_config.get("use_lstm") or \
                    model_config.get("use_attention"):

                from ray.rllib.models.tf.attention_net import \
                    AttentionWrapper
                from ray.rllib.models.tf.recurrent_net import LSTMWrapper

                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                if model_config.get("use_lstm"):
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, LSTMWrapper)
                else:
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, AttentionWrapper)

                v2_class._wrapped_forward = forward

            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)

        # Find a default TorchModelV2 and wrap with model_interface.
        elif framework == "torch":
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if not v2_class:
                raise ValueError("ModelV2 class could not be determined!")

            if model_config.get("use_lstm") or \
                    model_config.get("use_attention"):

                from ray.rllib.models.torch.attention_net import \
                    AttentionWrapper
                from ray.rllib.models.torch.recurrent_net import LSTMWrapper

                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                if model_config.get("use_lstm"):
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, LSTMWrapper)
                else:
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, AttentionWrapper)

                v2_class._wrapped_forward = forward

            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)

        # Find a default JAXModelV2 and wrap with model_interface.
        elif framework == "jax":
            v2_class = \
                default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        else:
            raise NotImplementedError(
                "`framework` must be 'tf2|tf|tfe|torch', but is "
                "{}!".format(framework))

Пример #16

Показать файл

Файл: catalog.py Проект: yyz940922/ray

    def get_action_dist(action_space: gym.Space,
                        config: ModelConfigDict,
                        dist_type: Optional[Union[
                            str, Type[ActionDistribution]]] = None,
                        framework: str = "tf",
                        **kwargs) -> (type, int):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (Optional[dict]): Optional model config.
            dist_type (Optional[Union[str, Type[ActionDistribution]]]):
                Identifier of the action distribution (str) interpreted as a
                hint or the actual ActionDistribution class to use.
            framework (str): One of "tf2", "tf", "tfe", "torch", or "jax".
            kwargs (dict): Optional kwargs to pass on to the Distribution's
                constructor.

        Returns:
            Tuple:
                - dist_class (ActionDistribution): Python class of the
                    distribution.
                - dist_dim (int): The size of the input vector to the
                    distribution.
        """

        dist_cls = None
        config = config or MODEL_DEFAULTS
        # Custom distribution given.
        if config.get("custom_action_dist"):
            custom_action_config = config.copy()
            action_dist_name = custom_action_config.pop("custom_action_dist")
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist_cls = _global_registry.get(RLLIB_ACTION_DIST,
                                            action_dist_name)
            return ModelCatalog._get_multi_action_distribution(
                dist_cls, action_space, custom_action_config, framework)

        # Dist_type is given directly as a class.
        elif type(dist_type) is type and \
                issubclass(dist_type, ActionDistribution) and \
                dist_type not in (
                MultiActionDistribution, TorchMultiActionDistribution):
            dist_cls = dist_type
        # Box space -> DiagGaussian OR Deterministic.
        elif isinstance(action_space, Box):
            if action_space.dtype.name.startswith("int"):
                low_ = np.min(action_space.low)
                high_ = np.max(action_space.high)
                assert np.all(action_space.low == low_)
                assert np.all(action_space.high == high_)
                dist_cls = TorchMultiCategorical if framework == "torch" \
                    else MultiCategorical
                num_cats = int(np.product(action_space.shape))
                return partial(
                    dist_cls,
                    input_lens=[high_ - low_ + 1 for _ in range(num_cats)],
                    action_space=action_space), num_cats * (high_ - low_ + 1)
            else:
                if len(action_space.shape) > 1:
                    raise UnsupportedSpaceException(
                        "Action space has multiple dimensions "
                        "{}. ".format(action_space.shape) +
                        "Consider reshaping this into a single dimension, "
                        "using a custom action distribution, "
                        "using a Tuple action space, or the multi-agent API.")
                # TODO(sven): Check for bounds and return SquashedNormal, etc..
                if dist_type is None:
                    dist_cls = TorchDiagGaussian if framework == "torch" \
                        else DiagGaussian
                elif dist_type == "deterministic":
                    dist_cls = TorchDeterministic if framework == "torch" \
                        else Deterministic
        # Discrete Space -> Categorical.
        elif isinstance(action_space, Discrete):
            dist_cls = TorchCategorical if framework == "torch" else \
                JAXCategorical if framework == "jax" else Categorical
        # Tuple/Dict Spaces -> MultiAction.
        elif dist_type in (MultiActionDistribution,
                           TorchMultiActionDistribution) or \
                isinstance(action_space, (Tuple, Dict)):
            return ModelCatalog._get_multi_action_distribution(
                (MultiActionDistribution
                 if framework == "tf" else TorchMultiActionDistribution),
                action_space, config, framework)
        # Simplex -> Dirichlet.
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist_cls = Dirichlet
        # MultiDiscrete -> MultiCategorical.
        elif isinstance(action_space, MultiDiscrete):
            dist_cls = TorchMultiCategorical if framework == "torch" else \
                MultiCategorical
            return partial(dist_cls, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))
        # Unknown type -> Error.
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist_cls, dist_cls.required_model_output_shape(
            action_space, config)

Пример #17

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(CustomVisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        input_shape = obs_space.shape
        self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        #is_training = tf.keras.layers.Input(
        #    shape=(), dtype=tf.bool, batch_size=1, name="is_training")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                padding="same",
                activation=activation,
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        p_layer = tf.keras.layers.Conv2D(
            filters=out_size,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(len(filters)))(last_layer)
        p_layer = tf.keras.layers.ReLU()(p_layer)

        v_layer = tf.keras.layers.Conv2D(
            filters=out_size,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(len(filters) + 1))(last_layer)
        v_layer = tf.keras.layers.ReLU()(v_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)
        v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer)
        self.last_layer_is_flattened = True

        self.num_outputs_p = p_layer.shape[1]
        self.num_outputs_v = v_layer.shape[1]
        self._value_out = v_layer

        self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out])
        self.base_model.summary()

Пример #18

Показать файл

Файл: fcnet.py Проект: amrit-dev-20/Dev-Training-Ray

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens", [])
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([int(np.product(obs_space.shape))] +
                                    hiddens[-1:])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(*vf_layers)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None

Пример #19

Показать файл

    def __init__(
            self,
            *,
            env_creator: Callable[[EnvContext], EnvType],
            validate_env: Optional[Callable[[EnvType, EnvContext],
                                            None]] = None,
            policy_spec: Union[type, Dict[
                str, Tuple[Optional[type], gym.Space, gym.Space,
                           PartialTrainerConfigDict]]] = None,
            policy_mapping_fn: Optional[Callable[[AgentID], PolicyID]] = None,
            policies_to_train: Optional[List[PolicyID]] = None,
            tf_session_creator: Optional[Callable[[], "tf1.Session"]] = None,
            rollout_fragment_length: int = 100,
            batch_mode: str = "truncate_episodes",
            episode_horizon: int = None,
            preprocessor_pref: str = "deepmind",
            sample_async: bool = False,
            compress_observations: bool = False,
            num_envs: int = 1,
            observation_fn: "ObservationFunction" = None,
            observation_filter: str = "NoFilter",
            clip_rewards: bool = None,
            clip_actions: bool = True,
            env_config: EnvConfigDict = None,
            model_config: ModelConfigDict = None,
            policy_config: TrainerConfigDict = None,
            worker_index: int = 0,
            num_workers: int = 0,
            monitor_path: str = None,
            log_dir: str = None,
            log_level: str = None,
            callbacks: Type["DefaultCallbacks"] = None,
            input_creator: Callable[[
                IOContext
            ], InputReader] = lambda ioctx: ioctx.default_sampler_input(),
            input_evaluation: List[str] = frozenset([]),
            output_creator: Callable[
                [IOContext], OutputWriter] = lambda ioctx: NoopOutput(),
            remote_worker_envs: bool = False,
            remote_env_batch_wait_ms: int = 0,
            soft_horizon: bool = False,
            no_done_at_end: bool = False,
            seed: int = None,
            extra_python_environs: dict = None,
            fake_sampler: bool = False,
            spaces: Optional[Dict[PolicyID, Tuple[gym.spaces.Space,
                                                  gym.spaces.Space]]] = None,
            policy: Union[type, Dict[
                str, Tuple[Optional[type], gym.Space, gym.Space,
                           PartialTrainerConfigDict]]] = None,
    ):
        """Initialize a rollout worker.

        Args:
            env_creator (Callable[[EnvContext], EnvType]): Function that
                returns a gym.Env given an EnvContext wrapped configuration.
            validate_env (Optional[Callable[[EnvType, EnvContext], None]]):
                Optional callable to validate the generated environment (only
                on worker=0).
            policy_spec (Union[type, Dict[str, Tuple[Type[Policy], gym.Space,
                gym.Space, PartialTrainerConfigDict]]]): Either a Policy class
                or a dict of policy id strings to
                (Policy class, obs_space, action_space, config)-tuples. If a
                dict is specified, then we are in multi-agent mode and a
                policy_mapping_fn can also be set (if not, will map all agents
                to DEFAULT_POLICY_ID).
            policy_mapping_fn (Optional[Callable[[AgentID], PolicyID]]): A
                callable that maps agent ids to policy ids in multi-agent mode.
                This function will be called each time a new agent appears in
                an episode, to bind that agent to a policy for the duration of
                the episode. If not provided, will map all agents to
                DEFAULT_POLICY_ID.
            policies_to_train (Optional[List[PolicyID]]): Optional list of
                policies to train, or None for all policies.
            tf_session_creator (Optional[Callable[[], tf1.Session]]): A
                function that returns a TF session. This is optional and only
                useful with TFPolicy.
            rollout_fragment_length (int): The target number of env transitions
                to include in each sample batch returned from this worker.
            batch_mode (str): One of the following batch modes:
                "truncate_episodes": Each call to sample() will return a batch
                    of at most `rollout_fragment_length * num_envs` in size.
                    The batch will be exactly
                    `rollout_fragment_length * num_envs` in size if
                    postprocessing does not change batch sizes. Episodes may be
                    truncated in order to meet this size requirement.
                "complete_episodes": Each call to sample() will return a batch
                    of at least `rollout_fragment_length * num_envs` in size.
                    Episodes will not be truncated, but multiple episodes may
                    be packed within one batch to meet the batch size. Note
                    that when `num_envs > 1`, episode steps will be buffered
                    until the episode completes, and hence batches may contain
                    significant amounts of off-policy data.
            episode_horizon (int): Whether to stop episodes at this horizon.
            preprocessor_pref (str): Whether to prefer RLlib preprocessors
                ("rllib") or deepmind ("deepmind") when applicable.
            sample_async (bool): Whether to compute samples asynchronously in
                the background, which improves throughput but can cause samples
                to be slightly off-policy.
            compress_observations (bool): If true, compress the observations.
                They can be decompressed with rllib/utils/compression.
            num_envs (int): If more than one, will create multiple envs
                and vectorize the computation of actions. This has no effect if
                if the env already implements VectorEnv.
            observation_fn (ObservationFunction): Optional multi-agent
                observation function.
            observation_filter (str): Name of observation filter to use.
            clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to
                experience postprocessing. Setting to None means clip for Atari
                only.
            clip_actions (bool): Whether to clip action values to the range
                specified by the policy action space.
            env_config (EnvConfigDict): Config to pass to the env creator.
            model_config (ModelConfigDict): Config to use when creating the
                policy model.
            policy_config (TrainerConfigDict): Config to pass to the policy.
                In the multi-agent case, this config will be merged with the
                per-policy configs specified by `policy_spec`.
            worker_index (int): For remote workers, this should be set to a
                non-zero and unique value. This index is passed to created envs
                through EnvContext so that envs can be configured per worker.
            num_workers (int): For remote workers, how many workers altogether
                have been created?
            monitor_path (str): Write out episode stats and videos to this
                directory if specified.
            log_dir (str): Directory where logs can be placed.
            log_level (str): Set the root log level on creation.
            callbacks (DefaultCallbacks): Custom training callbacks.
            input_creator (Callable[[IOContext], InputReader]): Function that
                returns an InputReader object for loading previous generated
                experiences.
            input_evaluation (List[str]): How to evaluate the policy
                performance. This only makes sense to set when the input is
                reading offline data. The possible values include:
                  - "is": the step-wise importance sampling estimator.
                  - "wis": the weighted step-wise is estimator.
                  - "simulation": run the environment in the background, but
                    use this data for evaluation only and never for learning.
            output_creator (Callable[[IOContext], OutputWriter]): Function that
                returns an OutputWriter object for saving generated
                experiences.
            remote_worker_envs (bool): If using num_envs > 1, whether to create
                those new envs in remote processes instead of in the current
                process. This adds overheads, but can make sense if your envs
            remote_env_batch_wait_ms (float): Timeout that remote workers
                are waiting when polling environments. 0 (continue when at
                least one env is ready) is a reasonable default, but optimal
                value could be obtained by measuring your environment
                step / reset and model inference perf.
            soft_horizon (bool): Calculate rewards but don't reset the
                environment when the horizon is hit.
            no_done_at_end (bool): Ignore the done=True at the end of the
                episode and instead record done=False.
            seed (int): Set the seed of both np and tf to this value to
                to ensure each remote worker has unique exploration behavior.
            extra_python_environs (dict): Extra python environments need to
                be set.
            fake_sampler (bool): Use a fake (inf speed) sampler for testing.
            spaces (Optional[Dict[PolicyID, Tuple[gym.spaces.Space,
                gym.spaces.Space]]]): An optional space dict mapping policy IDs
                to (obs_space, action_space)-tuples. This is used in case no
                Env is created on this RolloutWorker.
            policy: Obsoleted arg. Use `policy_spec` instead.
        """
        # Deprecated arg.
        if policy is not None:
            deprecation_warning("policy", "policy_spec", error=False)
            policy_spec = policy
        assert policy_spec is not None, "Must provide `policy_spec` when " \
                                        "creating RolloutWorker!"

        self._original_kwargs: dict = locals().copy()
        del self._original_kwargs["self"]

        global _global_worker
        _global_worker = self

        # set extra environs first
        if extra_python_environs:
            for key, value in extra_python_environs.items():
                os.environ[key] = str(value)

        def gen_rollouts():
            while True:
                yield self.sample()

        ParallelIteratorWorker.__init__(self, gen_rollouts, False)

        policy_config: TrainerConfigDict = policy_config or {}
        if (tf1 and policy_config.get("framework") in ["tf2", "tfe"]
                # This eager check is necessary for certain all-framework tests
                # that use tf's eager_mode() context generator.
                and not tf1.executing_eagerly()):
            tf1.enable_eager_execution()

        if log_level:
            logging.getLogger("ray.rllib").setLevel(log_level)

        if worker_index > 1:
            disable_log_once_globally()  # only need 1 worker to log
        elif log_level == "DEBUG":
            enable_periodic_logging()

        env_context = EnvContext(env_config or {}, worker_index)
        self.env_context = env_context
        self.policy_config: TrainerConfigDict = policy_config
        if callbacks:
            self.callbacks: "DefaultCallbacks" = callbacks()
        else:
            from ray.rllib.agents.callbacks import DefaultCallbacks
            self.callbacks: "DefaultCallbacks" = DefaultCallbacks()
        self.worker_index: int = worker_index
        self.num_workers: int = num_workers
        model_config: ModelConfigDict = model_config or {}
        policy_mapping_fn = (policy_mapping_fn
                             or (lambda agent_id: DEFAULT_POLICY_ID))
        if not callable(policy_mapping_fn):
            raise ValueError("Policy mapping function not callable?")
        self.env_creator: Callable[[EnvContext], EnvType] = env_creator
        self.rollout_fragment_length: int = rollout_fragment_length * num_envs
        self.batch_mode: str = batch_mode
        self.compress_observations: bool = compress_observations
        self.preprocessing_enabled: bool = True
        self.last_batch: SampleBatchType = None
        self.global_vars: dict = None
        self.fake_sampler: bool = fake_sampler

        # No Env will be used in this particular worker (not needed).
        if worker_index == 0 and num_workers > 0 and \
                policy_config["create_env_on_driver"] is False:
            self.env = None
        # Create an env for this worker.
        else:
            self.env = _validate_env(env_creator(env_context))
            if validate_env is not None:
                validate_env(self.env, self.env_context)

            if isinstance(self.env, (BaseEnv, MultiAgentEnv)):

                def wrap(env):
                    return env  # we can't auto-wrap these env types

            elif is_atari(self.env) and \
                    not model_config.get("custom_preprocessor") and \
                    preprocessor_pref == "deepmind":

                # Deepmind wrappers already handle all preprocessing.
                self.preprocessing_enabled = False

                # If clip_rewards not explicitly set to False, switch it
                # on here (clip between -1.0 and 1.0).
                if clip_rewards is None:
                    clip_rewards = True

                def wrap(env):
                    env = wrap_deepmind(
                        env,
                        dim=model_config.get("dim"),
                        framestack=model_config.get("framestack"))
                    if monitor_path:
                        from gym import wrappers
                        env = wrappers.Monitor(env, monitor_path, resume=True)
                    return env
            else:

                def wrap(env):
                    if monitor_path:
                        from gym import wrappers
                        env = wrappers.Monitor(env, monitor_path, resume=True)
                    return env

            self.env: EnvType = wrap(self.env)

        def make_env(vector_index):
            return wrap(
                env_creator(
                    env_context.copy_with_overrides(
                        worker_index=worker_index,
                        vector_index=vector_index,
                        remote=remote_worker_envs)))

        self.make_env_fn = make_env

        self.tf_sess = None
        policy_dict = _validate_and_canonicalize(
            policy_spec, self.env, spaces=spaces)
        self.policies_to_train: List[PolicyID] = policies_to_train or list(
            policy_dict.keys())
        self.policy_map: Dict[PolicyID, Policy] = None
        self.preprocessors: Dict[PolicyID, Preprocessor] = None

        # set numpy and python seed
        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)
            if not hasattr(self.env, "seed"):
                logger.info("Env doesn't support env.seed(): {}".format(
                    self.env))
            else:
                self.env.seed(seed)
            try:
                assert torch is not None
                torch.manual_seed(seed)
            except AssertionError:
                logger.info("Could not seed torch")
        if _has_tensorflow_graph(policy_dict) and not (
                tf1 and tf1.executing_eagerly()):
            if not tf1:
                raise ImportError("Could not import tensorflow")
            with tf1.Graph().as_default():
                if tf_session_creator:
                    self.tf_sess = tf_session_creator()
                else:
                    self.tf_sess = tf1.Session(
                        config=tf1.ConfigProto(
                            gpu_options=tf1.GPUOptions(allow_growth=True)))
                with self.tf_sess.as_default():
                    # set graph-level seed
                    if seed is not None:
                        tf1.set_random_seed(seed)
                    self.policy_map, self.preprocessors = \
                        self._build_policy_map(policy_dict, policy_config)
        else:
            self.policy_map, self.preprocessors = self._build_policy_map(
                policy_dict, policy_config)

        if (ray.is_initialized()
                and ray.worker._mode() != ray.worker.LOCAL_MODE):
            # Check available number of GPUs
            if not ray.get_gpu_ids():
                logger.debug("Creating policy evaluation worker {}".format(
                    worker_index) +
                             " on CPU (please ignore any CUDA init errors)")
            elif (policy_config["framework"] in ["tf2", "tf", "tfe"] and
                  not tf.config.experimental.list_physical_devices("GPU")) or \
                    (policy_config["framework"] == "torch" and
                     not torch.cuda.is_available()):
                raise RuntimeError(
                    "GPUs were assigned to this worker by Ray, but "
                    "your DL framework ({}) reports GPU acceleration is "
                    "disabled. This could be due to a bad CUDA- or {} "
                    "installation.".format(policy_config["framework"],
                                           policy_config["framework"]))

        self.multiagent: bool = set(
            self.policy_map.keys()) != {DEFAULT_POLICY_ID}
        if self.multiagent and self.env is not None:
            if not ((isinstance(self.env, MultiAgentEnv)
                     or isinstance(self.env, ExternalMultiAgentEnv))
                    or isinstance(self.env, BaseEnv)):
                raise ValueError(
                    "Have multiple policies {}, but the env ".format(
                        self.policy_map) +
                    "{} is not a subclass of BaseEnv, MultiAgentEnv or "
                    "ExternalMultiAgentEnv?".format(self.env))

        self.filters: Dict[PolicyID, Filter] = {
            policy_id: get_filter(observation_filter,
                                  policy.observation_space.shape)
            for (policy_id, policy) in self.policy_map.items()
        }
        if self.worker_index == 0:
            logger.info("Built filter map: {}".format(self.filters))

        self.num_envs: int = num_envs

        if self.env is None:
            self.async_env = None
        elif "custom_vector_env" in policy_config:
            custom_vec_wrapper = policy_config["custom_vector_env"]
            self.async_env = custom_vec_wrapper(self.env)
        else:
            # Always use vector env for consistency even if num_envs = 1.
            self.async_env: BaseEnv = BaseEnv.to_base_env(
                self.env,
                make_env=make_env,
                num_envs=num_envs,
                remote_envs=remote_worker_envs,
                remote_env_batch_wait_ms=remote_env_batch_wait_ms)

        # `truncate_episodes`: Allow a batch to contain more than one episode
        # (fragments) and always make the batch `rollout_fragment_length`
        # long.
        if self.batch_mode == "truncate_episodes":
            pack = True
        # `complete_episodes`: Never cut episodes and sampler will return
        # exactly one (complete) episode per poll.
        elif self.batch_mode == "complete_episodes":
            rollout_fragment_length = float("inf")
            pack = False
        else:
            raise ValueError("Unsupported batch mode: {}".format(
                self.batch_mode))

        self.io_context: IOContext = IOContext(log_dir, policy_config,
                                               worker_index, self)
        self.reward_estimators: List[OffPolicyEstimator] = []
        for method in input_evaluation:
            if method == "simulation":
                logger.warning(
                    "Requested 'simulation' input evaluation method: "
                    "will discard all sampler outputs and keep only metrics.")
                sample_async = True
            elif method == "is":
                ise = ImportanceSamplingEstimator.create(self.io_context)
                self.reward_estimators.append(ise)
            elif method == "wis":
                wise = WeightedImportanceSamplingEstimator.create(
                    self.io_context)
                self.reward_estimators.append(wise)
            else:
                raise ValueError(
                    "Unknown evaluation method: {}".format(method))

        if self.env is None:
            self.sampler = None
        elif sample_async:
            self.sampler = AsyncSampler(
                worker=self,
                env=self.async_env,
                policies=self.policy_map,
                policy_mapping_fn=policy_mapping_fn,
                preprocessors=self.preprocessors,
                obs_filters=self.filters,
                clip_rewards=clip_rewards,
                rollout_fragment_length=rollout_fragment_length,
                callbacks=self.callbacks,
                horizon=episode_horizon,
                multiple_episodes_in_batch=pack,
                tf_sess=self.tf_sess,
                clip_actions=clip_actions,
                blackhole_outputs="simulation" in input_evaluation,
                soft_horizon=soft_horizon,
                no_done_at_end=no_done_at_end,
                observation_fn=observation_fn,
                _use_trajectory_view_api=policy_config.get(
                    "_use_trajectory_view_api", False))
            # Start the Sampler thread.
            self.sampler.start()
        else:
            self.sampler = SyncSampler(
                worker=self,
                env=self.async_env,
                policies=self.policy_map,
                policy_mapping_fn=policy_mapping_fn,
                preprocessors=self.preprocessors,
                obs_filters=self.filters,
                clip_rewards=clip_rewards,
                rollout_fragment_length=rollout_fragment_length,
                callbacks=self.callbacks,
                horizon=episode_horizon,
                multiple_episodes_in_batch=pack,
                tf_sess=self.tf_sess,
                clip_actions=clip_actions,
                soft_horizon=soft_horizon,
                no_done_at_end=no_done_at_end,
                observation_fn=observation_fn,
                _use_trajectory_view_api=policy_config.get(
                    "_use_trajectory_view_api", False))

        self.input_reader: InputReader = input_creator(self.io_context)
        self.output_writer: OutputWriter = output_creator(self.io_context)

        logger.debug(
            "Created rollout worker with env {} ({}), policies {}".format(
                self.async_env, self.env, self.policy_map))

Пример #20

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        # At this point, self.num_outputs is the number of nodes coming
        # from the wrapped (underlying) model. In other words, self.num_outputs
        # is the input size for the LSTM layer.
        # If None, set it to the observation space.
        if self.num_outputs is None:
            self.num_outputs = int(np.product(self.obs_space.shape))

        self.cell_size = model_config["lstm_cell_size"]
        self.time_major = model_config.get("_time_major", False)
        self.use_prev_action = model_config["lstm_use_prev_action"]
        self.use_prev_reward = model_config["lstm_use_prev_reward"]

        if isinstance(action_space, Discrete):
            self.action_dim = action_space.n
        elif isinstance(action_space, MultiDiscrete):
            self.action_dim = np.sum(action_space.nvec)
        elif action_space.shape is not None:
            self.action_dim = int(np.product(action_space.shape))
        else:
            self.action_dim = int(len(action_space))

        # Add prev-action/reward nodes to input to LSTM.
        if self.use_prev_action:
            self.num_outputs += self.action_dim
        if self.use_prev_reward:
            self.num_outputs += 1

        # Define actual LSTM layer (with num_outputs being the nodes coming
        # from the wrapped (underlying) layer).
        self.lstm = nn.LSTM(self.num_outputs,
                            self.cell_size,
                            batch_first=not self.time_major)

        # Set self.num_outputs to the number of output nodes desired by the
        # caller of this constructor.
        self.num_outputs = num_outputs

        # Postprocess LSTM output with another hidden layer and compute values.
        self._logits_branch = SlimFC(in_size=self.cell_size,
                                     out_size=self.num_outputs,
                                     activation_fn=None,
                                     initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(in_size=self.cell_size,
                                    out_size=1,
                                    activation_fn=None,
                                    initializer=torch.nn.init.xavier_uniform_)

        # __sphinx_doc_begin__
        # Add prev-a/r to this model's view, if required.
        if model_config["lstm_use_prev_action"]:
            self.view_requirements[SampleBatch.PREV_ACTIONS] = \
                ViewRequirement(SampleBatch.ACTIONS, space=self.action_space,
                                shift=-1)
        if model_config["lstm_use_prev_reward"]:
            self.view_requirements[SampleBatch.PREV_REWARDS] = \
                ViewRequirement(SampleBatch.REWARDS, shift=-1)

Пример #21

Показать файл

Файл: model.py Проект: raphaelavalos/coma_rllib

    def __init__(self,
                 obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 num_outputs: int,
                 model_config: ModelConfigDict,
                 name: str = 'COMATorchModel',
                 communication: bool = True):
        nn.Module.__init__(self)
        super(COMATorchModel, self).__init__(obs_space, action_space,
                                             num_outputs, model_config, name)

        self.communication = communication
        assert self.is_time_major()
        self.recurrent = True

        if hasattr(self.obs_space, "original_space") and isinstance(
                self.obs_space.original_space, gym.spaces.Dict):
            original_space = self.obs_space.original_space
            self.has_avail_actions = 'avail_actions' in original_space.spaces
            self.has_real_state = 'state' in original_space.spaces
            self.has_q_value = 'q_value' in original_space.spaces
            self.has_value = 'value' in original_space.spaces
            self.true_obs_space = original_space['obs']
            if self.has_real_state:
                self.state_space = original_space['state']
        else:
            self.has_real_state = False
            self.has_q_value = False
            self.has_value = False
            self.state_space = None
            self.offsets = None
            self.true_obs_space = self.obs_space

        if not isinstance(self.true_obs_space, Box):
            raise UnsupportedSpaceException(
                "Space {} is not supported as observation.".format(
                    self.true_obs_space))

        if not isinstance(action_space, MultiDiscrete):
            raise UnsupportedSpaceException(
                "Space {} is not supported as action.".format(
                    self.action_space))

        assert len(
            self.true_obs_space.shape) == 2, "Observation space is supposed " \
                                             "to have 2 dimensions."

        self.nbr_agents = self.true_obs_space.shape[0]
        self.nbr_actions = int(self.action_space.nvec[0])
        self.gru_cell_size = model_config.get("gru_cell_size")

        self.inference_view_requirements.update({
            SampleBatch.OBS:
            ViewRequirement(shift=0),
            SampleBatch.PREV_ACTIONS:
            ViewRequirement(SampleBatch.ACTIONS, space=action_space, shift=-1),
            SampleBatch.ACTIONS:
            ViewRequirement(space=action_space),
            "state_in_{}".format(0):
            ViewRequirement("state_out_{}".format(0),
                            space=Box(-1.0,
                                      -1.0,
                                      shape=(self.nbr_agents,
                                             self.gru_cell_size)),
                            shift=-1)
        })

        self.stage1, self.gru, self.stage2 = self.create_actor()
        self.critic = self.create_critic()
        self.target_critic = self.create_critic()
        self.target_critic.load_state_dict(self.critic.state_dict())

Пример #22

Показать файл

Файл: visionnet.py Проект: stjordanis/ray

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="tf")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        input_shape = obs_space.shape
        self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(
                out_size if post_fcnet_hiddens else num_outputs,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_out")(last_layer)
            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else [])
            feature_out = last_layer

            for i, out_size in enumerate(layer_sizes):
                feature_out = last_layer
                last_layer = tf.keras.layers.Dense(
                    out_size,
                    name="post_fcnet_{}".format(i),
                    activation=post_fcnet_activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv{}".format(len(filters)))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                if post_fcnet_hiddens:
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        post_fcnet_hiddens[0], [1, 1],
                        activation=post_fcnet_activation,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens[1:] +
                                                 [num_outputs]):
                        feature_out = last_layer
                        last_layer = tf.keras.layers.Dense(
                            out_size,
                            name="post_fcnet_{}".format(i + 1),
                            activation=post_fcnet_activation
                            if i < len(post_fcnet_hiddens) - 1 else None,
                            kernel_initializer=normc_initializer(1.0))(
                                last_layer)
                else:
                    feature_out = last_layer
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        num_outputs, [1, 1],
                        activation=None,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)

                if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1:
                    raise ValueError(
                        "Given `conv_filters` ({}) do not result in a [B, 1, "
                        "1, {} (`num_outputs`)] shape (but in {})! Please "
                        "adjust your Conv2D stack such that the dims 1 and 2 "
                        "are both 1.".format(self.model_config["conv_filters"],
                                             self.num_outputs,
                                             list(last_cnn.shape)))

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                last_layer = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)

                # Add (optional) post-fc-stack after last Conv2D layer.
                for i, out_size in enumerate(post_fcnet_hiddens):
                    last_layer = tf.keras.layers.Dense(
                        out_size,
                        name="post_fcnet_{}".format(i),
                        activation=post_fcnet_activation,
                        kernel_initializer=normc_initializer(1.0))(last_layer)
                feature_out = last_layer
                self.num_outputs = last_layer.shape[1]
        logits_out = last_layer

        # Build the value layers
        if vf_share_layers:
            if not self.last_layer_is_flattened:
                feature_out = tf.keras.layers.Lambda(
                    lambda x: tf.squeeze(x, axis=[1, 2]))(feature_out)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(feature_out)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=stride if isinstance(stride, (list, tuple)) else
                    (stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [logits_out, value_out])

Пример #23

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            raise ValueError("Config for conv_filters is required")
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        # FIXME add stacking here
        (w, in_channels) = obs_space.shape
        in_size = w
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding_1d(in_size, kernel, stride)
            layers.append(
                SlimConv1d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            layers.append(
                SlimConv1d(
                    in_channels,
                    num_outputs,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))
            out_channels = num_outputs
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv1d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = np.ceil((in_size - kernel) / stride)

                padding, _ = same_padding_1d(in_size, 1, 1)
                self._logits = SlimConv1d(out_channels,
                                          num_outputs,
                                          1,
                                          1,
                                          padding,
                                          activation_fn=None)
            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(out_channels,
                                        1,
                                        initializer=normc_initializer(0.01),
                                        activation_fn=None)
        else:
            vf_layers = []
            (h, w, in_channels) = obs_space.shape
            assert h == 1
            in_size = w
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding_1d(in_size, kernel, stride)
                vf_layers.append(
                    SlimConv1d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv1d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           None,
                           activation_fn=activation))

            vf_layers.append(
                SlimConv1d(in_channels=out_channels,
                           out_channels=1,
                           kernel=1,
                           stride=1,
                           padding=None,
                           activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None

Пример #24

Показать файл

Файл: catalog.py Проект: raphaelavalos/ray

    def get_model_v2(obs_space: gym.Space,
                     action_space: gym.Space,
                     num_outputs: int,
                     model_config: ModelConfigDict,
                     framework: str = "tf",
                     name: str = "default_model",
                     model_interface: type = None,
                     default_model: type = None,
                     **model_kwargs) -> ModelV2:
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            framework (str): One of "tf", "tfe", or "torch".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        if model_config.get("custom_model"):

            if "custom_options" in model_config and \
                    model_config["custom_options"] != DEPRECATED_VALUE:
                deprecation_warning("model.custom_options",
                                    "model.custom_model_config",
                                    error=False)
                model_config["custom_model_config"] = \
                    model_config.pop("custom_options")

            # Allow model kwargs to be overriden / augmented by
            # custom_model_config.
            customized_model_kwargs = dict(
                model_kwargs, **model_config.get("custom_model_config", {}))

            if isinstance(model_config["custom_model"], type):
                model_cls = model_config["custom_model"]
            else:
                model_cls = _global_registry.get(RLLIB_MODEL,
                                                 model_config["custom_model"])

            # TODO(sven): Hard-deprecate Model(V1).
            if issubclass(model_cls, ModelV2):
                logger.info("Wrapping {} as {}".format(model_cls,
                                                       model_interface))
                model_cls = ModelCatalog._wrap_if_needed(
                    model_cls, model_interface)

                if framework in ["tf", "tfe"]:
                    # Track and warn if vars were created but not registered.
                    created = set()

                    def track_var_creation(next_creator, **kw):
                        v = next_creator(**kw)
                        created.add(v)
                        return v

                    with tf.variable_creator_scope(track_var_creation):
                        # Try calling with kwargs first (custom ModelV2 should
                        # accept these as kwargs, not get them from
                        # config["custom_model_config"] anymore).
                        try:
                            instance = model_cls(obs_space, action_space,
                                                 num_outputs, model_config,
                                                 name,
                                                 **customized_model_kwargs)
                        except TypeError as e:
                            # Keyword error: Try old way w/o kwargs.
                            if "__init__() got an unexpected " in e.args[0]:
                                instance = model_cls(obs_space, action_space,
                                                     num_outputs, model_config,
                                                     name, **model_kwargs)
                                logger.warning(
                                    "Custom ModelV2 should accept all custom "
                                    "options as **kwargs, instead of expecting"
                                    " them in config['custom_model_config']!")
                            # Other error -> re-raise.
                            else:
                                raise e
                    registered = set(instance.variables())
                    not_registered = set()
                    for var in created:
                        if var not in registered:
                            not_registered.add(var)
                    if not_registered:
                        raise ValueError(
                            "It looks like variables {} were created as part "
                            "of {} but does not appear in model.variables() "
                            "({}). Did you forget to call "
                            "model.register_variables() on the variables in "
                            "question?".format(not_registered, instance,
                                               registered))
                else:
                    # PyTorch automatically tracks nn.Modules inside the parent
                    # nn.Module's constructor.
                    # Try calling with kwargs first (custom ModelV2 should
                    # accept these as kwargs, not get them from
                    # config["custom_model_config"] anymore).
                    try:
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **customized_model_kwargs)
                    except TypeError as e:
                        # Keyword error: Try old way w/o kwargs.
                        if "__init__() got an unexpected " in e.args[0]:
                            instance = model_cls(obs_space, action_space,
                                                 num_outputs, model_config,
                                                 name, **model_kwargs)
                            logger.warning(
                                "Custom ModelV2 should accept all custom "
                                "options as **kwargs, instead of expecting"
                                " them in config['custom_model_config']!")
                        # Other error -> re-raise.
                        else:
                            raise e
                return instance
            # TODO(sven): Hard-deprecate Model(V1). This check will be
            #   superflous then.
            elif tf.executing_eagerly():
                raise ValueError(
                    "Eager execution requires a TFModelV2 model to be "
                    "used, however you specified a custom model {}".format(
                        model_cls))

        if framework in ["tf", "tfe", "tf2"]:
            v2_class = None
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if model_config.get("use_lstm"):
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, LSTMWrapper)
                v2_class._wrapped_forward = forward

            # fallback to a default v1 model
            if v2_class is None:
                if tf.executing_eagerly():
                    raise ValueError(
                        "Eager execution requires a TFModelV2 model to be "
                        "used, however there is no default V2 model for this "
                        "observation space: {}, use_lstm={}".format(
                            obs_space, model_config.get("use_lstm")))
                v2_class = make_v1_wrapper(ModelCatalog.get_model)
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        elif framework == "torch":
            v2_class = \
                default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)
            if model_config.get("use_lstm"):
                from ray.rllib.models.torch.recurrent_net import LSTMWrapper \
                    as TorchLSTMWrapper
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, TorchLSTMWrapper)
                v2_class._wrapped_forward = forward
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        else:
            raise NotImplementedError(
                "`framework` must be 'tf|tfe|torch', but is "
                "{}!".format(framework))

Пример #25

Показать файл

Файл: catalog.py Проект: raphaelavalos/ray

    def get_action_dist(action_space: gym.Space,
                        config: ModelConfigDict,
                        dist_type: str = None,
                        framework: str = "tf",
                        **kwargs) -> (type, int):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (Optional[dict]): Optional model config.
            dist_type (Optional[str]): Identifier of the action distribution
                interpreted as a hint.
            framework (str): One of "tf", "tfe", or "torch".
            kwargs (dict): Optional kwargs to pass on to the Distribution's
                constructor.

        Returns:
            Tuple:
                - dist_class (ActionDistribution): Python class of the
                    distribution.
                - dist_dim (int): The size of the input vector to the
                    distribution.
        """

        dist = None
        config = config or MODEL_DEFAULTS
        # Custom distribution given.
        if config.get("custom_action_dist"):
            action_dist_name = config["custom_action_dist"]
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name)
        # Dist_type is given directly as a class.
        elif type(dist_type) is type and \
                issubclass(dist_type, ActionDistribution) and \
                dist_type not in (
                MultiActionDistribution, TorchMultiActionDistribution):
            dist = dist_type
        # Box space -> DiagGaussian OR Deterministic.
        elif isinstance(action_space, gym.spaces.Box):
            if len(action_space.shape) > 1:
                raise UnsupportedSpaceException(
                    "Action space has multiple dimensions "
                    "{}. ".format(action_space.shape) +
                    "Consider reshaping this into a single dimension, "
                    "using a custom action distribution, "
                    "using a Tuple action space, or the multi-agent API.")
            # TODO(sven): Check for bounds and return SquashedNormal, etc..
            if dist_type is None:
                dist = TorchDiagGaussian if framework == "torch" \
                    else DiagGaussian
            elif dist_type == "deterministic":
                dist = TorchDeterministic if framework == "torch" \
                    else Deterministic
        # Discrete Space -> Categorical.
        elif isinstance(action_space, gym.spaces.Discrete):
            dist = TorchCategorical if framework == "torch" else Categorical
        # Tuple/Dict Spaces -> MultiAction.
        elif dist_type in (MultiActionDistribution,
                           TorchMultiActionDistribution) or \
                isinstance(action_space, (gym.spaces.Tuple, gym.spaces.Dict)):
            flat_action_space = flatten_space(action_space)
            child_dists_and_in_lens = tree.map_structure(
                lambda s: ModelCatalog.get_action_dist(
                    s, config, framework=framework), flat_action_space)
            child_dists = [e[0] for e in child_dists_and_in_lens]
            input_lens = [int(e[1]) for e in child_dists_and_in_lens]
            return partial((TorchMultiActionDistribution if framework
                            == "torch" else MultiActionDistribution),
                           action_space=action_space,
                           child_distributions=child_dists,
                           input_lens=input_lens), int(sum(input_lens))
        # Simplex -> Dirichlet.
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist = Dirichlet
        # MultiDiscrete -> MultiCategorical.
        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            dist = TorchMultiCategorical if framework == "torch" else \
                MultiCategorical
            return partial(dist, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))
        # Unknown type -> Error.
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist, dist.required_model_output_shape(action_space, config)

Пример #26

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(CustomVisionNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="tf")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")
        self.traj_view_framestacking = False

        # Perform Atari framestacking via traj. view API.
        if model_config.get("num_framestacks") != "auto" and \
                model_config.get("num_framestacks", 0) > 1:
            input_shape = obs_space.shape + (model_config["num_framestacks"], )
            self.data_format = "channels_first"
            self.traj_view_framestacking = True
        else:
            input_shape = obs_space.shape
            self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        #is_training = tf.keras.layers.Input(
        #    shape=(), dtype=tf.bool, batch_size=1, name="is_training")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            if i == 1:
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.ReLU()(last_layer)
            else:
                input_layer = last_layer
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i * 2 - 2))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.ReLU()(last_layer)
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i * 2 - 1))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.Add()([input_layer, last_layer])
                last_layer = tf.keras.layers.ReLU()(last_layer)

        out_size, kernel, stride = filters[-1]

        p_layer = tf.keras.layers.Conv2D(filters=out_size,
                                         kernel_size=kernel,
                                         strides=(stride, stride),
                                         padding="valid",
                                         data_format="channels_last",
                                         name="conv{}".format(
                                             2 * len(filters)))(last_layer)
        p_layer = tf.keras.layers.ReLU()(p_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        #p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)

        v_layer = tf.keras.layers.Conv2D(
            filters=1,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(2 * len(filters) + 1))(last_layer)
        v_layer = tf.keras.layers.ReLU()(v_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)
        v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer)
        self.last_layer_is_flattened = True
        '''
        # Add (optional) post-fc-stack after last Conv2D layer.
        for i, out_size in enumerate(post_fcnet_hiddens):
            last_layer = tf.keras.layers.Dense(
                out_size,
                name="post_fcnet_{}".format(i),
                activation=post_fcnet_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        '''
        self.num_outputs_p = p_layer.shape[1]
        self.num_outputs_v = v_layer.shape[1]
        logits_out = p_layer
        self._value_out = v_layer
        '''
        # Add (optional) post-fc-stack after last Conv2D layer.
        for i, out_size in enumerate(post_fcnet_hiddens):
            last_layer = tf.keras.layers.Dense(
                out_size,
                name="post_fcnet_{}".format(i),
                activation=post_fcnet_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        '''
        '''
        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Flatten(
                data_format="channels_last")(last_layer)
            #last_layer = tf.keras.layers.Lambda(
            #    lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
        '''
        self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out])
        self.base_model.summary()

Пример #27

Показать файл

    def _get_v2_model_class(input_space: gym.Space,
                            model_config: ModelConfigDict,
                            framework: str = "tf") -> Type[ModelV2]:

        VisionNet = None
        ComplexNet = None
        Keras_FCNet = None
        Keras_VisionNet = None

        if framework in ["tf2", "tf", "tfe"]:
            from ray.rllib.models.tf.fcnet import \
                FullyConnectedNetwork as FCNet, \
                Keras_FullyConnectedNetwork as Keras_FCNet
            from ray.rllib.models.tf.visionnet import \
                VisionNetwork as VisionNet, \
                Keras_VisionNetwork as Keras_VisionNet
            from ray.rllib.models.tf.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "torch":
            from ray.rllib.models.torch.fcnet import (FullyConnectedNetwork as
                                                      FCNet)
            from ray.rllib.models.torch.visionnet import (VisionNetwork as
                                                          VisionNet)
            from ray.rllib.models.torch.complex_input_net import \
                ComplexInputNetwork as ComplexNet
        elif framework == "jax":
            from ray.rllib.models.jax.fcnet import (FullyConnectedNetwork as
                                                    FCNet)
        else:
            raise ValueError(
                "framework={} not supported in `ModelCatalog._get_v2_model_"
                "class`!".format(framework))

        # Complex space, where at least one sub-space is image.
        # -> Complex input model (which auto-flattens everything, but correctly
        # processes image components with default CNN stacks).
        space_to_check = input_space if not hasattr(
            input_space, "original_space") else input_space.original_space
        if isinstance(input_space, (Dict, Tuple)) or (isinstance(
                space_to_check, (Dict, Tuple)) and any(
                    isinstance(s, Box) and len(s.shape) >= 2
                    for s in tree.flatten(space_to_check.spaces))):
            return ComplexNet

        # Single, flattenable/one-hot-able space -> Simple FCNet.
        if isinstance(input_space, (Discrete, MultiDiscrete)) or \
                len(input_space.shape) == 1 or (
                len(input_space.shape) == 2):
            # Keras native requested AND no auto-rnn-wrapping.
            if model_config.get("_use_default_native_models") and Keras_FCNet:
                return Keras_FCNet
            # Classic ModelV2 FCNet.
            else:
                return FCNet

        elif framework == "jax":
            raise NotImplementedError("No non-FC default net for JAX yet!")

        # Last resort: Conv2D stack for single image spaces.
        if model_config.get("_use_default_native_models") and Keras_VisionNet:
            return Keras_VisionNet
        return VisionNet

Пример #28

Показать файл

Файл: visionnet.py Проект: wuisawesome/ray

    def __init__(
        self,
        obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):

        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="torch"
        )

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        (w, h, in_channels) = obs_space.shape

        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, stride)
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation,
                )
            )
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            out_channels = out_channels if post_fcnet_hiddens else num_outputs
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else []
            )
            for i, out_size in enumerate(layer_sizes):
                layers.append(
                    SlimFC(
                        in_size=out_channels,
                        out_size=out_size,
                        activation_fn=post_fcnet_activation,
                        initializer=normc_initializer(1.0),
                    )
                )
                out_channels = out_size

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride),
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                if post_fcnet_hiddens:
                    layers.append(nn.Flatten())
                    in_size = out_channels
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]):
                        layers.append(
                            SlimFC(
                                in_size=in_size,
                                out_size=out_size,
                                activation_fn=post_fcnet_activation
                                if i < len(post_fcnet_hiddens) - 1
                                else None,
                                initializer=normc_initializer(1.0),
                            )
                        )
                        in_size = out_size
                    # Last layer is logits layer.
                    self._logits = layers.pop()

                else:
                    self._logits = SlimConv2d(
                        out_channels,
                        num_outputs,
                        [1, 1],
                        1,
                        padding,
                        activation_fn=None,
                    )

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())

        self._convs = nn.Sequential(*layers)

        # If our num_outputs still unknown, we need to do a test pass to
        # figure out the output dimensions. This could be the case, if we have
        # the Flatten layer at the end.
        if self.num_outputs is None:
            # Create a B=1 dummy sample and push it through out conv-net.
            dummy_in = (
                torch.from_numpy(self.obs_space.sample())
                .permute(2, 0, 1)
                .unsqueeze(0)
                .float()
            )
            dummy_out = self._convs(dummy_in)
            self.num_outputs = dummy_out.shape[1]

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(
                out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None
            )
        else:
            vf_layers = []
            (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel, stride)
                vf_layers.append(
                    SlimConv2d(
                        in_channels,
                        out_channels,
                        kernel,
                        stride,
                        padding,
                        activation_fn=activation,
                    )
                )
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,
                    activation_fn=activation,
                )
            )

            vf_layers.append(
                SlimConv2d(
                    in_channels=out_channels,
                    out_channels=1,
                    kernel=1,
                    stride=1,
                    padding=None,
                    activation_fn=None,
                )
            )
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None

Пример #29

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(out_size,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv{}".format(
                                                    len(filters)))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                                  activation=None,
                                                  padding="same",
                                                  data_format="channels_last",
                                                  name="conv_out")(last_layer)

                if conv_out.shape[1] != 1 or conv_out.shape[2] != 1:
                    raise ValueError(
                        "Given `conv_filters` ({}) do not result in a [B, 1, "
                        "1, {} (`num_outputs`)] shape (but in {})! Please "
                        "adjust your Conv2D stack such that the dims 1 and 2 "
                        "are both 1.".format(self.model_config["conv_filters"],
                                             self.num_outputs,
                                             list(conv_out.shape)))

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                conv_out = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)
                self.num_outputs = conv_out.shape[1]

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(out_size,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_value_{}".format(
                                                    len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)

Пример #30

Показать файл

    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        super(FullyConnectedNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        hiddens = model_config.get("fcnet_hiddens", []) + \
            model_config.get("post_fcnet_hiddens", [])
        activation = model_config.get("fcnet_activation")
        if not model_config.get("fcnet_hiddens", []):
            activation = model_config.get("post_fcnet_activation")
        activation = get_activation_fn(activation)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")
        free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2
            self.log_std_var = tf.Variable([0.0] * num_outputs,
                                           dtype=tf.float32,
                                           name="log_std")

        # We are using obs_flat, so take the flattened shape as input.
        inputs = tf.keras.layers.Input(shape=(int(np.product(
            obs_space.shape)), ),
                                       name="observations")
        # Last hidden layer output (before logits outputs).
        last_layer = inputs
        # The action distribution outputs.
        logits_out = None
        i = 1

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            logits_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                last_layer = tf.keras.layers.Dense(
                    hiddens[-1],
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
            if num_outputs:
                logits_out = tf.keras.layers.Dense(
                    num_outputs,
                    name="fc_out",
                    activation=None,
                    kernel_initializer=normc_initializer(0.01))(last_layer)
            # Adjust num_outputs to be the number of nodes in the last layer.
            else:
                self.num_outputs = ([int(np.product(obs_space.shape))] +
                                    hiddens[-1:])[-1]

        # Concat the log std vars to the end of the state-dependent means.
        if free_log_std and logits_out is not None:

            def tiled_log_std(x):
                return tf.tile(tf.expand_dims(self.log_std_var, 0),
                               [tf.shape(x)[0], 1])

            log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs)
            logits_out = tf.keras.layers.Concatenate(axis=1)(
                [logits_out, log_std_out])

        last_vf_layer = None
        if not vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            last_vf_layer = inputs
            i = 1
            for size in hiddens:
                last_vf_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_vf_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(
                last_vf_layer if last_vf_layer is not None else last_layer)

        self.base_model = tf.keras.Model(inputs, [
            (logits_out if logits_out is not None else last_layer), value_out
        ])