Пример #1
0
    def get_preprocessor_for_space(observation_space, options=None):
        """Returns a suitable preprocessor for the given observation space.

        Args:
            observation_space (Space): The input observation space.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the observations.
        """

        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            logger.info("Using custom preprocessor {}".format(preprocessor))
            prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                observation_space, options)
        else:
            cls = get_preprocessor(observation_space)
            prep = cls(observation_space, options)

        logger.debug("Created preprocessor {}: {} -> {}".format(
            prep, observation_space, prep.shape))
        return prep
Пример #2
0
    def _setup(self, config):
        env = self._env_id
        if env:
            config["env"] = env
            if _global_registry.contains(ENV_CREATOR, env):
                self.env_creator = _global_registry.get(ENV_CREATOR, env)
            else:
                import gym  # soft dependency
                self.env_creator = lambda env_config: gym.make(env)
        else:
            self.env_creator = lambda env_config: None

        # Merge the supplied config with the class default
        merged_config = copy.deepcopy(self._default_config)
        merged_config = deep_update(merged_config, config,
                                    self._allow_unknown_configs,
                                    self._allow_unknown_subkeys)
        self.raw_user_config = config
        self.config = merged_config
        Agent._validate_config(self.config)
        if self.config.get("log_level"):
            logging.getLogger("ray.rllib").setLevel(self.config["log_level"])

        # TODO(ekl) setting the graph is unnecessary for PyTorch agents
        with tf.Graph().as_default():
            self._init()
Пример #3
0
    def get_torch_model(obs_space,
                        num_outputs,
                        options=None,
                        default_model_cls=None):
        """Returns a custom model for PyTorch algorithms.

        Args:
            obs_space (Space): The input observation space.
            num_outputs (int): The size of the output vector of the model.
            options (dict): Optional args to pass to the model constructor.
            default_model_cls (cls): Optional class to use if no custom model.

        Returns:
            model (models.Model): Neural network model.
        """
        from ray.rllib.models.pytorch.fcnet import (FullyConnectedNetwork as
                                                    PyTorchFCNet)
        from ray.rllib.models.pytorch.visionnet import (VisionNetwork as
                                                        PyTorchVisionNet)

        options = options or MODEL_DEFAULTS

        if options.get("custom_model"):
            model = options["custom_model"]
            logger.debug("Using custom torch model {}".format(model))
            return _global_registry.get(RLLIB_MODEL,
                                        model)(obs_space, num_outputs, options)

        if options.get("use_lstm"):
            raise NotImplementedError(
                "LSTM auto-wrapping not implemented for torch")

        if default_model_cls:
            return default_model_cls(obs_space, num_outputs, options)

        if isinstance(obs_space, gym.spaces.Discrete):
            obs_rank = 1
        else:
            obs_rank = len(obs_space.shape)

        if obs_rank > 1:
            return PyTorchVisionNet(obs_space, num_outputs, options)

        return PyTorchFCNet(obs_space, num_outputs, options)
Пример #4
0
    def _get_model(input_dict, obs_space, action_space, num_outputs, options,
                   state_in, seq_lens):
        if options.get("custom_model"):
            model = options["custom_model"]
            logger.debug("Using custom model {}".format(model))
            return _global_registry.get(RLLIB_MODEL, model)(
                input_dict,
                obs_space,
                action_space,
                num_outputs,
                options,
                state_in=state_in,
                seq_lens=seq_lens)

        obs_rank = len(input_dict["obs"].shape) - 1

        if obs_rank > 1:
            return VisionNetwork(input_dict, obs_space, action_space,
                                 num_outputs, options)

        return FullyConnectedNetwork(input_dict, obs_space, action_space,
                                     num_outputs, options)
Пример #5
0
    def get_model_v2(obs_space: gym.Space,
                     action_space: gym.Space,
                     num_outputs: int,
                     model_config: ModelConfigDict,
                     framework: str = "tf",
                     name: str = "default_model",
                     model_interface: type = None,
                     default_model: type = None,
                     **model_kwargs) -> ModelV2:
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            model_config (ModelConfigDict): The "model" sub-config dict
                within the Trainer's config dict.
            framework (str): One of "tf2", "tf", "tfe", "torch", or "jax".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        # Validate the given config dict.
        ModelCatalog._validate_config(config=model_config,
                                      action_space=action_space,
                                      framework=framework)

        if model_config.get("custom_model"):
            # Allow model kwargs to be overridden / augmented by
            # custom_model_config.
            customized_model_kwargs = dict(
                model_kwargs, **model_config.get("custom_model_config", {}))

            if isinstance(model_config["custom_model"], type):
                model_cls = model_config["custom_model"]
            else:
                model_cls = _global_registry.get(RLLIB_MODEL,
                                                 model_config["custom_model"])

            # Only allow ModelV2 or native keras Models.
            if not issubclass(model_cls, ModelV2):
                if framework not in [
                        "tf", "tf2", "tfe"
                ] or not issubclass(model_cls, tf.keras.Model):
                    raise ValueError(
                        "`model_cls` must be a ModelV2 sub-class, but is"
                        " {}!".format(model_cls))

            logger.info("Wrapping {} as {}".format(model_cls, model_interface))
            model_cls = ModelCatalog._wrap_if_needed(model_cls,
                                                     model_interface)

            if framework in ["tf2", "tf", "tfe"]:
                # Try wrapping custom model with LSTM/attention, if required.
                if model_config.get("use_lstm") or model_config.get(
                        "use_attention"):
                    from ray.rllib.models.tf.attention_net import (
                        AttentionWrapper,
                        Keras_AttentionWrapper,
                    )
                    from ray.rllib.models.tf.recurrent_net import (
                        LSTMWrapper,
                        Keras_LSTMWrapper,
                    )

                    wrapped_cls = model_cls
                    # Wrapped (custom) model is itself a keras Model ->
                    # wrap with keras LSTM/GTrXL (attention) wrappers.
                    if issubclass(wrapped_cls, tf.keras.Model):
                        model_cls = (Keras_LSTMWrapper
                                     if model_config.get("use_lstm") else
                                     Keras_AttentionWrapper)
                        model_config["wrapped_cls"] = wrapped_cls
                    # Wrapped (custom) model is ModelV2 ->
                    # wrap with ModelV2 LSTM/GTrXL (attention) wrappers.
                    else:
                        forward = wrapped_cls.forward
                        model_cls = ModelCatalog._wrap_if_needed(
                            wrapped_cls,
                            LSTMWrapper if model_config.get("use_lstm") else
                            AttentionWrapper,
                        )
                        model_cls._wrapped_forward = forward

                # Obsolete: Track and warn if vars were created but not
                # registered. Only still do this, if users do register their
                # variables. If not (which they shouldn't), don't check here.
                created = set()

                def track_var_creation(next_creator, **kw):
                    v = next_creator(**kw)
                    created.add(v)
                    return v

                with tf.variable_creator_scope(track_var_creation):
                    if issubclass(model_cls, tf.keras.Model):
                        instance = model_cls(
                            input_space=obs_space,
                            action_space=action_space,
                            num_outputs=num_outputs,
                            name=name,
                            **customized_model_kwargs,
                        )
                    else:
                        # Try calling with kwargs first (custom ModelV2 should
                        # accept these as kwargs, not get them from
                        # config["custom_model_config"] anymore).
                        try:
                            instance = model_cls(
                                obs_space,
                                action_space,
                                num_outputs,
                                model_config,
                                name,
                                **customized_model_kwargs,
                            )
                        except TypeError as e:
                            # Keyword error: Try old way w/o kwargs.
                            if "__init__() got an unexpected " in e.args[0]:
                                instance = model_cls(
                                    obs_space,
                                    action_space,
                                    num_outputs,
                                    model_config,
                                    name,
                                    **model_kwargs,
                                )
                                logger.warning(
                                    "Custom ModelV2 should accept all custom "
                                    "options as **kwargs, instead of expecting"
                                    " them in config['custom_model_config']!")
                            # Other error -> re-raise.
                            else:
                                raise e

                # User still registered TFModelV2's variables: Check, whether
                # ok.
                registered = []
                if not isinstance(instance, tf.keras.Model):
                    registered = set(instance.var_list)
                if len(registered) > 0:
                    not_registered = set()
                    for var in created:
                        if var not in registered:
                            not_registered.add(var)
                    if not_registered:
                        raise ValueError(
                            "It looks like you are still using "
                            "`{}.register_variables()` to register your "
                            "model's weights. This is no longer required, but "
                            "if you are still calling this method at least "
                            "once, you must make sure to register all created "
                            "variables properly. The missing variables are {},"
                            " and you only registered {}. "
                            "Did you forget to call `register_variables()` on "
                            "some of the variables in question?".format(
                                instance, not_registered, registered))
            elif framework == "torch":
                # Try wrapping custom model with LSTM/attention, if required.
                if model_config.get("use_lstm") or model_config.get(
                        "use_attention"):
                    from ray.rllib.models.torch.attention_net import AttentionWrapper
                    from ray.rllib.models.torch.recurrent_net import LSTMWrapper

                    wrapped_cls = model_cls
                    forward = wrapped_cls.forward
                    model_cls = ModelCatalog._wrap_if_needed(
                        wrapped_cls,
                        LSTMWrapper
                        if model_config.get("use_lstm") else AttentionWrapper,
                    )
                    model_cls._wrapped_forward = forward

                # PyTorch automatically tracks nn.Modules inside the parent
                # nn.Module's constructor.
                # Try calling with kwargs first (custom ModelV2 should
                # accept these as kwargs, not get them from
                # config["custom_model_config"] anymore).
                try:
                    instance = model_cls(
                        obs_space,
                        action_space,
                        num_outputs,
                        model_config,
                        name,
                        **customized_model_kwargs,
                    )
                except TypeError as e:
                    # Keyword error: Try old way w/o kwargs.
                    if "__init__() got an unexpected " in e.args[0]:
                        instance = model_cls(
                            obs_space,
                            action_space,
                            num_outputs,
                            model_config,
                            name,
                            **model_kwargs,
                        )
                        logger.warning(
                            "Custom ModelV2 should accept all custom "
                            "options as **kwargs, instead of expecting"
                            " them in config['custom_model_config']!")
                    # Other error -> re-raise.
                    else:
                        raise e
            else:
                raise NotImplementedError(
                    "`framework` must be 'tf2|tf|tfe|torch', but is "
                    "{}!".format(framework))

            return instance

        # Find a default TFModelV2 and wrap with model_interface.
        if framework in ["tf", "tfe", "tf2"]:
            v2_class = None
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if not v2_class:
                raise ValueError("ModelV2 class could not be determined!")

            if model_config.get("use_lstm") or model_config.get(
                    "use_attention"):

                from ray.rllib.models.tf.attention_net import (
                    AttentionWrapper,
                    Keras_AttentionWrapper,
                )
                from ray.rllib.models.tf.recurrent_net import (
                    LSTMWrapper,
                    Keras_LSTMWrapper,
                )

                wrapped_cls = v2_class
                if model_config.get("use_lstm"):
                    if issubclass(wrapped_cls, tf.keras.Model):
                        v2_class = Keras_LSTMWrapper
                        model_config["wrapped_cls"] = wrapped_cls
                    else:
                        v2_class = ModelCatalog._wrap_if_needed(
                            wrapped_cls, LSTMWrapper)
                        v2_class._wrapped_forward = wrapped_cls.forward
                else:
                    if issubclass(wrapped_cls, tf.keras.Model):
                        v2_class = Keras_AttentionWrapper
                        model_config["wrapped_cls"] = wrapped_cls
                    else:
                        v2_class = ModelCatalog._wrap_if_needed(
                            wrapped_cls, AttentionWrapper)
                        v2_class._wrapped_forward = wrapped_cls.forward

            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)

            if issubclass(wrapper, tf.keras.Model):
                model = wrapper(
                    input_space=obs_space,
                    action_space=action_space,
                    num_outputs=num_outputs,
                    name=name,
                    **dict(model_kwargs, **model_config),
                )
                return model

            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)

        # Find a default TorchModelV2 and wrap with model_interface.
        elif framework == "torch":
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if not v2_class:
                raise ValueError("ModelV2 class could not be determined!")

            if model_config.get("use_lstm") or model_config.get(
                    "use_attention"):

                from ray.rllib.models.torch.attention_net import AttentionWrapper
                from ray.rllib.models.torch.recurrent_net import LSTMWrapper

                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                if model_config.get("use_lstm"):
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, LSTMWrapper)
                else:
                    v2_class = ModelCatalog._wrap_if_needed(
                        wrapped_cls, AttentionWrapper)

                v2_class._wrapped_forward = forward

            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)

        # Find a default JAXModelV2 and wrap with model_interface.
        elif framework == "jax":
            v2_class = default_model or ModelCatalog._get_v2_model_class(
                obs_space, model_config, framework=framework)
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        else:
            raise NotImplementedError(
                "`framework` must be 'tf2|tf|tfe|torch', but is "
                "{}!".format(framework))
Пример #6
0
    def get_model_v2(obs_space: gym.Space,
                     action_space: gym.Space,
                     num_outputs: int,
                     model_config: ModelConfigDict,
                     framework: str = "tf",
                     name: str = "default_model",
                     model_interface: type = None,
                     default_model: type = None,
                     **model_kwargs) -> ModelV2:
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            framework (str): One of "tf", "tfe", or "torch".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        if model_config.get("custom_model"):

            # Allow model kwargs to be overriden / augmented by
            # custom_model_config.
            customized_model_kwargs = dict(
                model_kwargs, **model_config.get("custom_model_config", {}))

            if isinstance(model_config["custom_model"], type):
                model_cls = model_config["custom_model"]
            else:
                model_cls = _global_registry.get(RLLIB_MODEL,
                                                 model_config["custom_model"])

            if not issubclass(model_cls, ModelV2):
                raise ValueError(
                    "`model_cls` must be a ModelV2 sub-class, but is"
                    " {}!".format(model_cls))

            logger.info("Wrapping {} as {}".format(model_cls, model_interface))
            model_cls = ModelCatalog._wrap_if_needed(model_cls,
                                                     model_interface)

            if framework in ["tf2", "tf", "tfe"]:
                # Track and warn if vars were created but not registered.
                created = set()

                def track_var_creation(next_creator, **kw):
                    v = next_creator(**kw)
                    created.add(v)
                    return v

                with tf.variable_creator_scope(track_var_creation):
                    # Try calling with kwargs first (custom ModelV2 should
                    # accept these as kwargs, not get them from
                    # config["custom_model_config"] anymore).
                    try:
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **customized_model_kwargs)
                    except TypeError as e:
                        # Keyword error: Try old way w/o kwargs.
                        if "__init__() got an unexpected " in e.args[0]:
                            instance = model_cls(obs_space, action_space,
                                                 num_outputs, model_config,
                                                 name, **model_kwargs)
                            logger.warning(
                                "Custom ModelV2 should accept all custom "
                                "options as **kwargs, instead of expecting"
                                " them in config['custom_model_config']!")
                        # Other error -> re-raise.
                        else:
                            raise e
                registered = set(instance.variables())
                not_registered = set()
                for var in created:
                    if var not in registered:
                        not_registered.add(var)
                if not_registered:
                    raise ValueError(
                        "It looks like variables {} were created as part "
                        "of {} but does not appear in model.variables() "
                        "({}). Did you forget to call "
                        "model.register_variables() on the variables in "
                        "question?".format(not_registered, instance,
                                           registered))
            else:
                # PyTorch automatically tracks nn.Modules inside the parent
                # nn.Module's constructor.
                # Try calling with kwargs first (custom ModelV2 should
                # accept these as kwargs, not get them from
                # config["custom_model_config"] anymore).
                try:
                    instance = model_cls(obs_space, action_space, num_outputs,
                                         model_config, name,
                                         **customized_model_kwargs)
                except TypeError as e:
                    # Keyword error: Try old way w/o kwargs.
                    if "__init__() got an unexpected " in e.args[0]:
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **model_kwargs)
                        logger.warning(
                            "Custom ModelV2 should accept all custom "
                            "options as **kwargs, instead of expecting"
                            " them in config['custom_model_config']!")
                    # Other error -> re-raise.
                    else:
                        raise e
            return instance

        if framework in ["tf", "tfe", "tf2"]:
            v2_class = None
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if not v2_class:
                raise ValueError("ModelV2 class could not be determined!")

            if model_config.get("use_lstm"):
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, LSTMWrapper)
                v2_class._wrapped_forward = forward

            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        elif framework == "torch":
            v2_class = \
                default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)
            if model_config.get("use_lstm"):
                from ray.rllib.models.torch.recurrent_net import LSTMWrapper \
                    as TorchLSTMWrapper
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, TorchLSTMWrapper)
                v2_class._wrapped_forward = forward
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        else:
            raise NotImplementedError(
                "`framework` must be 'tf2|tf|tfe|torch', but is "
                "{}!".format(framework))
Пример #7
0
    def __init__(self, observation_space, action_space, config):
        # config = dict(ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, **config)
        self.sess = tf.get_default_session()
        self.action_space = action_space
        self.config = config
        self.kl_coeff_val = self.config["kl_coeff"]
        self.kl_target = self.config["kl_target"]
        self.inner_lr = self.config["inner_lr"]
        self.outer_lr = self.config["outer_lr"]
        self.mode = self.config["mode"]
        assert self.mode in ["local", "remote"]
        assert self.kl_coeff_val == 0.0

        dist_cls, logit_dim = ModelCatalog.get_action_dist(
            action_space, self.config["model"])

        with tf.name_scope("inputs"):
            obs_ph = tf.placeholder(tf.float32,
                                    shape=(None, ) + observation_space.shape,
                                    name="obs")
            adv_ph = tf.placeholder(tf.float32,
                                    shape=(None, ),
                                    name="advantages")
            act_ph = ModelCatalog.get_action_placeholder(action_space)
            logits_ph = tf.placeholder(tf.float32,
                                       shape=(None, logit_dim),
                                       name="logits")
            vf_preds_ph = tf.placeholder(tf.float32,
                                         shape=(None, ),
                                         name="vf_preds")
            value_targets_ph = tf.placeholder(tf.float32,
                                              shape=(None, ),
                                              name="value_targets")
            prev_actions_ph = ModelCatalog.get_action_placeholder(action_space)
            prev_rewards_ph = tf.placeholder(tf.float32,
                                             shape=(None, ),
                                             name="prev_rewards")
            existing_state_in = None
            existing_seq_lens = None

        self.observations = obs_ph

        self.a3c_loss_in = [("obs", obs_ph), ("advantages", adv_ph),
                            ("actions", act_ph),
                            ("value_targets", value_targets_ph),
                            ("vf_preds", vf_preds_ph),
                            ("prev_actions", prev_actions_ph),
                            ("prev_rewards", prev_rewards_ph)]
        # self.a3c_loss_in = [
        #     ("obs", obs_ph),
        #     ("advantages", adv_ph),
        #     ("actions", act_ph),
        #     ("prev_actions", prev_actions_ph),
        #     ("prev_rewards", prev_rewards_ph)]
        self.ppo_loss_in = list(self.a3c_loss_in) \
            + [("logits", logits_ph)]

        assert self.config["model"]["custom_model"]
        logger.info(
            f'Using custom model {self.config["model"]["custom_model"]}')
        model_cls = _global_registry.get(RLLIB_MODEL,
                                         self.config["model"]["custom_model"])
        new_variables, grad_placeholders, custom_variables, dummy_variables = \
            model_cls.prepare(observation_space,
                              (logit_dim // 2
                               if self.config["model"]["free_log_std"]
                               else logit_dim),
                              self.config["model"],
                              func=lambda x, y: x - self.inner_lr * y)
        self._new_variables = new_variables
        self._grad_placeholders = grad_placeholders
        self._custom_variables = custom_variables
        self._dummy_variables = dummy_variables
        self._inner_variables = nest.flatten(custom_variables)
        # for Meta-SGD, `custom_variables` and `adaptive learning rates`
        self._outer_variables = nest.flatten(custom_variables)

        self._variables = {var.op.name: var for var in self._outer_variables}

        self._grad_phs_loss_inputs = []
        for i in range(len(grad_placeholders)):
            for key, ph in grad_placeholders[i].items():
                self._grad_phs_loss_inputs.append(
                    (custom_variables[i][key].op.name, ph))
        self._grad_phs_loss_input_dict = dict(self._grad_phs_loss_inputs)

        self.model = model_cls(
            {
                "obs": obs_ph,
                "prev_actions": prev_actions_ph,
                "prev_rewards": prev_actions_ph
            },
            observation_space,
            logit_dim,
            self.config["model"],
            state_in=existing_state_in,
            seq_lens=existing_seq_lens,
            custom_params=new_variables)

        self.logits = self.model.outputs
        with tf.name_scope("sampler"):
            curr_action_dist = dist_cls(self.logits)
            self.sampler = curr_action_dist.sample()

        assert self.config["use_gae"] and self.config["vf_share_layers"]
        self.value_function = self.model.value_function()

        if self.model.state_in:
            raise NotImplementedError
        else:
            mask = None

        with tf.name_scope("a3c_loss"):
            self.a3c_loss_obj = A3CLoss(
                action_dist=curr_action_dist,
                actions=act_ph,
                advantages=adv_ph,
                value_targets=value_targets_ph,
                vf_preds=vf_preds_ph,
                value_function=self.value_function,
                vf_loss_coeff=self.config["vf_loss_coeff"],
                entropy_coeff=self.config["entropy_coeff"],
                vf_clip_param=self.config["vf_clip_param"])
            # self.a3c_loss_obj = PGLoss(
            #     curr_action_dist, act_ph, adv_ph)
        with tf.name_scope(
                "ppo_loss"
        ):  # write own PPO loss, boolean_mask -> dynamic_partition
            self.ppo_loss_obj = PPOLoss(
                action_dist=curr_action_dist,
                action_space=action_space,
                logits=logits_ph,
                actions=act_ph,
                advantages=adv_ph,
                value_targets=value_targets_ph,
                vf_preds=vf_preds_ph,
                value_function=self.value_function,
                valid_mask=mask,
                kl_coeff=self.kl_coeff_val,
                clip_param=self.config["clip_param"],
                vf_clip_param=self.config["vf_clip_param"],
                vf_loss_coeff=self.config["vf_loss_coeff"],
                entropy_coeff=self.config["entropy_coeff"],
                use_gae=self.config["use_gae"])

        BaseMAMLPolicyGraph.__init__(
            self,
            observation_space,
            action_space,
            self.sess,
            obs_input=obs_ph,
            action_sampler=self.sampler,
            inner_loss=self.a3c_loss_obj.loss,
            inner_loss_inputs=self.a3c_loss_in,
            outer_loss=self.ppo_loss_obj.loss,
            outer_loss_inputs=self.ppo_loss_in,
            state_inputs=self.model.state_in,
            state_outputs=self.model.state_out,
            prev_action_input=prev_actions_ph,
            prev_reward_input=prev_rewards_ph,
            seq_lens=self.model.seq_lens,
            max_seq_len=self.config["model"]["max_seq_len"])

        self.a3c_stats_fetches = {
            "total_loss": self.a3c_loss_obj.loss,
            "policy_loss": self.a3c_loss_obj.mean_policy_loss,
            "vf_loss": self.a3c_loss_obj.mean_vf_loss,
            "entropy": self.a3c_loss_obj.mean_entropy
        }
        self.ppo_stats_fetches = {
            "total_loss": self.ppo_loss_obj.loss,
            "policy_Loss": self.ppo_loss_obj.mean_policy_loss,
            "vf_loss": self.ppo_loss_obj.mean_vf_loss,
            "entropy": self.ppo_loss_obj.mean_entropy,
            "kl": self.ppo_loss_obj.mean_kl
        }

        self.sess.run(tf.global_variables_initializer())
Пример #8
0
    def get_action_dist(action_space: gym.Space,
                        config: ModelConfigDict,
                        dist_type: Optional[Union[
                            str, Type[ActionDistribution]]] = None,
                        framework: str = "tf",
                        **kwargs) -> (type, int):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (Optional[dict]): Optional model config.
            dist_type (Optional[Union[str, Type[ActionDistribution]]]):
                Identifier of the action distribution (str) interpreted as a
                hint or the actual ActionDistribution class to use.
            framework (str): One of "tf2", "tf", "tfe", "torch", or "jax".
            kwargs (dict): Optional kwargs to pass on to the Distribution's
                constructor.

        Returns:
            Tuple:
                - dist_class (ActionDistribution): Python class of the
                    distribution.
                - dist_dim (int): The size of the input vector to the
                    distribution.
        """

        dist_cls = None
        config = config or MODEL_DEFAULTS
        # Custom distribution given.
        if config.get("custom_action_dist"):
            custom_action_config = config.copy()
            action_dist_name = custom_action_config.pop("custom_action_dist")
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist_cls = _global_registry.get(RLLIB_ACTION_DIST,
                                            action_dist_name)
            return ModelCatalog._get_multi_action_distribution(
                dist_cls, action_space, custom_action_config, framework)

        # Dist_type is given directly as a class.
        elif (type(dist_type) is type
              and issubclass(dist_type, ActionDistribution) and dist_type
              not in (MultiActionDistribution, TorchMultiActionDistribution)):
            dist_cls = dist_type
        # Box space -> DiagGaussian OR Deterministic.
        elif isinstance(action_space, Box):
            if action_space.dtype.name.startswith("int"):
                low_ = np.min(action_space.low)
                high_ = np.max(action_space.high)
                dist_cls = (TorchMultiCategorical
                            if framework == "torch" else MultiCategorical)
                num_cats = int(np.product(action_space.shape))
                return (
                    partial(
                        dist_cls,
                        input_lens=[high_ - low_ + 1 for _ in range(num_cats)],
                        action_space=action_space,
                    ),
                    num_cats * (high_ - low_ + 1),
                )
            else:
                if len(action_space.shape) > 1:
                    raise UnsupportedSpaceException(
                        "Action space has multiple dimensions "
                        "{}. ".format(action_space.shape) +
                        "Consider reshaping this into a single dimension, "
                        "using a custom action distribution, "
                        "using a Tuple action space, or the multi-agent API.")
                # TODO(sven): Check for bounds and return SquashedNormal, etc..
                if dist_type is None:
                    return (
                        partial(
                            TorchDiagGaussian
                            if framework == "torch" else DiagGaussian,
                            action_space=action_space,
                        ),
                        DiagGaussian.required_model_output_shape(
                            action_space, config),
                    )
                elif dist_type == "deterministic":
                    dist_cls = (TorchDeterministic
                                if framework == "torch" else Deterministic)
        # Discrete Space -> Categorical.
        elif isinstance(action_space, Discrete):
            dist_cls = (TorchCategorical if framework == "torch" else
                        JAXCategorical if framework == "jax" else Categorical)
        # Tuple/Dict Spaces -> MultiAction.
        elif (dist_type in (
                MultiActionDistribution,
                TorchMultiActionDistribution,
        ) or isinstance(action_space, (Tuple, Dict))):
            return ModelCatalog._get_multi_action_distribution(
                (MultiActionDistribution
                 if framework == "tf" else TorchMultiActionDistribution),
                action_space,
                config,
                framework,
            )
        # Simplex -> Dirichlet.
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist_cls = Dirichlet
        # MultiDiscrete -> MultiCategorical.
        elif isinstance(action_space, MultiDiscrete):
            dist_cls = (TorchMultiCategorical
                        if framework == "torch" else MultiCategorical)
            return partial(dist_cls, input_lens=action_space.nvec), int(
                sum(action_space.nvec))
        # Unknown type -> Error.
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist_cls, dist_cls.required_model_output_shape(
            action_space, config)
Пример #9
0
    def get_action_dist(action_space,
                        config,
                        dist_type=None,
                        framework="tf",
                        **kwargs):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (Optional[dict]): Optional model config.
            dist_type (Optional[str]): Identifier of the action distribution
                interpreted as a hint.
            framework (str): One of "tf", "tfe", or "torch".
            kwargs (dict): Optional kwargs to pass on to the Distribution's
                constructor.

        Returns:
            Tuple:
                - dist_class (ActionDistribution): Python class of the
                    distribution.
                - dist_dim (int): The size of the input vector to the
                    distribution.
        """

        dist = None
        config = config or MODEL_DEFAULTS
        # Custom distribution given.
        if config.get("custom_action_dist"):
            action_dist_name = config["custom_action_dist"]
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name)
        # Dist_type is given directly as a class.
        elif type(dist_type) is type and \
                issubclass(dist_type, ActionDistribution) and \
                dist_type not in (
                MultiActionDistribution, TorchMultiActionDistribution):
            dist = dist_type
        # Box space -> DiagGaussian OR Deterministic.
        elif isinstance(action_space, gym.spaces.Box):
            if len(action_space.shape) > 1:
                raise UnsupportedSpaceException(
                    "Action space has multiple dimensions "
                    "{}. ".format(action_space.shape) +
                    "Consider reshaping this into a single dimension, "
                    "using a custom action distribution, "
                    "using a Tuple action space, or the multi-agent API.")
            # TODO(sven): Check for bounds and return SquashedNormal, etc..
            if dist_type is None:
                dist = TorchDiagGaussian if framework == "torch" \
                    else DiagGaussian
            elif dist_type == "deterministic":
                dist = TorchDeterministic if framework == "torch" \
                    else Deterministic
        # Discrete Space -> Categorical.
        elif isinstance(action_space, gym.spaces.Discrete):
            dist = TorchCategorical if framework == "torch" else Categorical
        # Tuple/Dict Spaces -> MultiAction.
        elif dist_type in (MultiActionDistribution,
                           TorchMultiActionDistribution) or \
                isinstance(action_space, (gym.spaces.Tuple, gym.spaces.Dict)):
            flat_action_space = flatten_space(action_space)
            child_dists_and_in_lens = tree.map_structure(
                lambda s: ModelCatalog.get_action_dist(
                    s, config, framework=framework), flat_action_space)
            child_dists = [e[0] for e in child_dists_and_in_lens]
            input_lens = [int(e[1]) for e in child_dists_and_in_lens]
            return partial((TorchMultiActionDistribution if framework
                            == "torch" else MultiActionDistribution),
                           action_space=action_space,
                           child_distributions=child_dists,
                           input_lens=input_lens), int(sum(input_lens))
        # Simplex -> Dirichlet.
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist = Dirichlet
        # MultiDiscrete -> MultiCategorical.
        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            dist = TorchMultiCategorical if framework == "torch" else \
                MultiCategorical
            return partial(dist, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))
        # Unknown type -> Error.
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist, dist.required_model_output_shape(action_space, config)
Пример #10
0
    def get_model_v2(obs_space,
                     action_space,
                     num_outputs,
                     model_config,
                     framework="tf",
                     name="default_model",
                     model_interface=None,
                     default_model=None,
                     **model_kwargs):
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            framework (str): One of "tf", "tfe", or "torch".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        if model_config.get("custom_model"):

            if "custom_options" in model_config and \
                    model_config["custom_options"] != DEPRECATED_VALUE:
                deprecation_warning("model.custom_options",
                                    "model.custom_model_config",
                                    error=False)
                model_config["custom_model_config"] = \
                    model_config.pop("custom_options")

            if isinstance(model_config["custom_model"], type):
                model_cls = model_config["custom_model"]
            else:
                model_cls = _global_registry.get(RLLIB_MODEL,
                                                 model_config["custom_model"])

            # TODO(sven): Hard-deprecate Model(V1).
            if issubclass(model_cls, ModelV2):
                logger.info("Wrapping {} as {}".format(model_cls,
                                                       model_interface))
                model_cls = ModelCatalog._wrap_if_needed(
                    model_cls, model_interface)

                if framework in ["tf", "tfe"]:
                    # Track and warn if vars were created but not registered.
                    created = set()

                    def track_var_creation(next_creator, **kw):
                        v = next_creator(**kw)
                        created.add(v)
                        return v

                    with tf.variable_creator_scope(track_var_creation):
                        # Try calling with kwargs first (custom ModelV2 should
                        # accept these as kwargs, not get them from
                        # config["custom_model_config"] anymore).
                        try:
                            instance = model_cls(obs_space, action_space,
                                                 num_outputs, model_config,
                                                 name, **model_kwargs)
                        except TypeError as e:
                            # Keyword error: Try old way w/o kwargs.
                            if "__init__() got an unexpected " in e.args[0]:
                                logger.warning(
                                    "Custom ModelV2 should accept all custom "
                                    "options as **kwargs, instead of expecting"
                                    " them in config['custom_model_config']!")
                                instance = model_cls(obs_space, action_space,
                                                     num_outputs, model_config,
                                                     name)
                            # Other error -> re-raise.
                            else:
                                raise e
                    registered = set(instance.variables())
                    not_registered = set()
                    for var in created:
                        if var not in registered:
                            not_registered.add(var)
                    if not_registered:
                        raise ValueError(
                            "It looks like variables {} were created as part "
                            "of {} but does not appear in model.variables() "
                            "({}). Did you forget to call "
                            "model.register_variables() on the variables in "
                            "question?".format(not_registered, instance,
                                               registered))
                else:
                    # PyTorch automatically tracks nn.Modules inside the parent
                    # nn.Module's constructor.
                    # TODO(sven): Do this for TF as well.
                    instance = model_cls(obs_space, action_space, num_outputs,
                                         model_config, name, **model_kwargs)
                return instance
            # TODO(sven): Hard-deprecate Model(V1). This check will be
            #   superflous then.
            elif tf.executing_eagerly():
                raise ValueError(
                    "Eager execution requires a TFModelV2 model to be "
                    "used, however you specified a custom model {}".format(
                        model_cls))

        if framework in ["tf", "tfe"]:
            v2_class = None
            # Try to get a default v2 model.
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)

            if model_config.get("use_lstm"):
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, LSTMWrapper)
                v2_class._wrapped_forward = forward

            # fallback to a default v1 model
            if v2_class is None:
                if tf.executing_eagerly():
                    raise ValueError(
                        "Eager execution requires a TFModelV2 model to be "
                        "used, however there is no default V2 model for this "
                        "observation space: {}, use_lstm={}".format(
                            obs_space, model_config.get("use_lstm")))
                v2_class = make_v1_wrapper(ModelCatalog.get_model)
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        elif framework == "torch":
            v2_class = \
                default_model or ModelCatalog._get_v2_model_class(
                    obs_space, model_config, framework=framework)
            if model_config.get("use_lstm"):
                from ray.rllib.models.torch.recurrent_net import LSTMWrapper \
                    as TorchLSTMWrapper
                wrapped_cls = v2_class
                forward = wrapped_cls.forward
                v2_class = ModelCatalog._wrap_if_needed(
                    wrapped_cls, TorchLSTMWrapper)
                v2_class._wrapped_forward = forward
            # Wrap in the requested interface.
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        else:
            raise NotImplementedError(
                "`framework` must be 'tf|tfe|torch', but is "
                "{}!".format(framework))
Пример #11
0
    def get_model_v2(obs_space,
                     action_space,
                     num_outputs,
                     model_config,
                     framework,
                     name="default_model",
                     model_interface=None,
                     default_model=None,
                     **model_kwargs):
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            framework (str): Either "tf" or "torch".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            default_model (cls): Override the default class for the model. This
                only has an effect when not using a custom model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        if model_config.get("custom_model"):
            model_cls = _global_registry.get(RLLIB_MODEL,
                                             model_config["custom_model"])
            if issubclass(model_cls, ModelV2):
                if model_interface and not issubclass(model_cls,
                                                      model_interface):
                    raise ValueError("The given model must subclass",
                                     model_interface)

                if framework == "tf":
                    created = set()

                    # Track and warn if vars were created but not registered
                    def track_var_creation(next_creator, **kw):
                        v = next_creator(**kw)
                        created.add(v)
                        return v

                    with tf.variable_creator_scope(track_var_creation):
                        instance = model_cls(obs_space, action_space,
                                             num_outputs, model_config, name,
                                             **model_kwargs)
                    registered = set(instance.variables())
                    not_registered = set()
                    for var in created:
                        if var not in registered:
                            not_registered.add(var)
                    if not_registered:
                        raise ValueError(
                            "It looks like variables {} were created as part "
                            "of {} but does not appear in model.variables() "
                            "({}). Did you forget to call "
                            "model.register_variables() on the variables in "
                            "question?".format(not_registered, instance,
                                               registered))
                else:
                    # no variable tracking
                    instance = model_cls(obs_space, action_space, num_outputs,
                                         model_config, name, **model_kwargs)
                return instance
            elif tf.executing_eagerly():
                raise ValueError(
                    "Eager execution requires a TFModelV2 model to be "
                    "used, however you specified a custom model {}".format(
                        model_cls))

        if framework == "tf":
            v2_class = None
            # try to get a default v2 model
            if not model_config.get("custom_model"):
                v2_class = default_model or ModelCatalog._get_v2_model(
                    obs_space, model_config)
            # fallback to a default v1 model
            if v2_class is None:
                if tf.executing_eagerly():
                    raise ValueError(
                        "Eager execution requires a TFModelV2 model to be "
                        "used, however there is no default V2 model for this "
                        "observation space: {}, use_lstm={}".format(
                            obs_space, model_config.get("use_lstm")))
                v2_class = make_v1_wrapper(ModelCatalog.get_model)
            # wrap in the requested interface
            wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)
        elif framework == "torch":
            if default_model:
                return default_model(obs_space, action_space, num_outputs,
                                     model_config, name)
            return ModelCatalog._get_default_torch_model_v2(
                obs_space, action_space, num_outputs, model_config, name)
        else:
            raise NotImplementedError(
                "Framework must be 'tf' or 'torch': {}".format(framework))
Пример #12
0
    def _setup(self, config):
        env = self._env_id
        if env:
            config["env"] = env
            if _global_registry.contains(ENV_CREATOR, env):
                self.env_creator = _global_registry.get(ENV_CREATOR, env)
            else:
                import gym  # soft dependency
                self.env_creator = lambda env_config: gym.make(env)
        else:
            self.env_creator = lambda env_config: None

        # Merge the supplied config with the class default, but store the
        # user-provided one.
        self.raw_user_config = config
        self.config = Trainer.merge_trainer_configs(self._default_config,
                                                    config)

        # Check and resolve DL framework settings.
        if "use_pytorch" in self.config and \
                self.config["use_pytorch"] != DEPRECATED_VALUE:
            deprecation_warning("use_pytorch", "framework=torch", error=False)
            if self.config["use_pytorch"]:
                self.config["framework"] = "torch"
            self.config.pop("use_pytorch")
        if "eager" in self.config and self.config["eager"] != DEPRECATED_VALUE:
            deprecation_warning("eager", "framework=tfe", error=False)
            if self.config["eager"]:
                self.config["framework"] = "tfe"
            self.config.pop("eager")

        # Check all dependencies and resolve "auto" framework.
        self.config["framework"] = check_framework(self.config["framework"])
        # Notify about eager/tracing support.
        if tf and self.config["framework"] == "tfe":
            if not tf.executing_eagerly():
                tf.enable_eager_execution()
            logger.info("Executing eagerly, with eager_tracing={}".format(
                self.config["eager_tracing"]))
        if tf and not tf.executing_eagerly() and \
                self.config["framework"] != "torch":
            logger.info("Tip: set framework=tfe or the --eager flag to enable "
                        "TensorFlow eager execution")

        if self.config["normalize_actions"]:
            inner = self.env_creator

            def normalize(env):
                import gym  # soft dependency
                if not isinstance(env, gym.Env):
                    raise ValueError(
                        "Cannot apply NormalizeActionActionWrapper to env of "
                        "type {}, which does not subclass gym.Env.", type(env))
                return NormalizeActionWrapper(env)

            self.env_creator = lambda env_config: normalize(inner(env_config))

        Trainer._validate_config(self.config)
        if not callable(self.config["callbacks"]):
            raise ValueError(
                "`callbacks` must be a callable method that "
                "returns a subclass of DefaultCallbacks, got {}".format(
                    self.config["callbacks"]))
        self.callbacks = self.config["callbacks"]()
        log_level = self.config.get("log_level")
        if log_level in ["WARN", "ERROR"]:
            logger.info("Current log_level is {}. For more information, "
                        "set 'log_level': 'INFO' / 'DEBUG' or use the -v and "
                        "-vv flags.".format(log_level))
        if self.config.get("log_level"):
            logging.getLogger("ray.rllib").setLevel(self.config["log_level"])

        def get_scope():
            if tf and not tf.executing_eagerly():
                return tf.Graph().as_default()
            else:
                return open(os.devnull)  # fake a no-op scope

        with get_scope():
            self._init(self.config, self.env_creator)

            # Evaluation setup.
            if self.config.get("evaluation_interval"):
                # Update env_config with evaluation settings:
                extra_config = copy.deepcopy(self.config["evaluation_config"])
                # Assert that user has not unset "in_evaluation".
                assert "in_evaluation" not in extra_config or \
                    extra_config["in_evaluation"] is True
                extra_config.update({
                    "batch_mode": "complete_episodes",
                    "rollout_fragment_length": 1,
                    "in_evaluation": True,
                })
                logger.debug(
                    "using evaluation_config: {}".format(extra_config))

                self.evaluation_workers = self._make_workers(
                    self.env_creator,
                    self._policy,
                    merge_dicts(self.config, extra_config),
                    num_workers=self.config["evaluation_num_workers"])
                self.evaluation_metrics = {}
Пример #13
0
    def get_action_dist(action_space,
                        config,
                        dist_type=None,
                        torch=None,
                        framework="tf"):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (dict): Optional model config.
            dist_type (Optional[str]): Identifier of the action distribution.
            torch (bool): Obsoleted: Whether to return PyTorch Model and
                distribution (use framework="torch" instead).
            framework (str): One of "tf" or "torch".

        Returns:
            dist_class (ActionDistribution): Python class of the distribution.
            dist_dim (int): The size of the input vector to the distribution.
        """
        # Obsoleted parameter `torch`:
        if torch is not None:
            deprecation_warning("`torch` parameter", "`framework`='tf|torch'")
            framework = "torch" if torch else "tf"

        dist = None
        config = config or MODEL_DEFAULTS
        if config.get("custom_action_dist"):
            action_dist_name = config["custom_action_dist"]
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name)

        elif isinstance(action_space, gym.spaces.Box):
            if len(action_space.shape) > 1:
                raise UnsupportedSpaceException(
                    "Action space has multiple dimensions "
                    "{}. ".format(action_space.shape) +
                    "Consider reshaping this into a single dimension, "
                    "using a custom action distribution, "
                    "using a Tuple action space, or the multi-agent API.")
            if dist_type is None:
                dist = DiagGaussian if framework == "tf" else TorchDiagGaussian
            elif dist_type == "deterministic":
                dist = Deterministic
        elif isinstance(action_space, gym.spaces.Discrete):
            dist = Categorical if framework == "tf" else TorchCategorical
        elif isinstance(action_space, gym.spaces.Tuple):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Tuple action spaces not supported for Pytorch.")
            child_dist = []
            input_lens = []
            for action in action_space.spaces:
                dist, action_size = ModelCatalog.get_action_dist(
                    action, config)
                child_dist.append(dist)
                input_lens.append(action_size)
            return partial(MultiActionDistribution,
                           child_distributions=child_dist,
                           action_space=action_space,
                           input_lens=input_lens), sum(input_lens)
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist = Dirichlet
        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "MultiDiscrete action spaces not supported for Pytorch.")
            return partial(MultiCategorical, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))
        elif isinstance(action_space, gym.spaces.Dict):
            # TODO(sven): implement
            raise NotImplementedError(
                "Dict action spaces are not supported, consider using "
                "gym.spaces.Tuple instead")
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist, dist.required_model_output_shape(action_space, config)
Пример #14
0
    def get_action_dist(action_space, config, dist_type=None, torch=False):
        """Returns action distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (dict): Optional model config.
            dist_type (str): Optional identifier of the action distribution.
            torch (bool):  Optional whether to return PyTorch distribution.

        Returns:
            dist_class (ActionDistribution): Python class of the distribution.
            dist_dim (int): The size of the input vector to the distribution.
        """

        config = config or MODEL_DEFAULTS
        if config.get("custom_action_dist"):
            action_dist_name = config["custom_action_dist"]
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name)

        elif isinstance(action_space, gym.spaces.Box):
            if len(action_space.shape) > 1:
                raise UnsupportedSpaceException(
                    "Action space has multiple dimensions "
                    "{}. ".format(action_space.shape) +
                    "Consider reshaping this into a single dimension, "
                    "using a custom action distribution, "
                    "using a Tuple action space, or the multi-agent API.")
            if dist_type is None:
                dist = TorchDiagGaussian if torch else DiagGaussian
            elif dist_type == "deterministic":
                dist = Deterministic
        elif isinstance(action_space, gym.spaces.Discrete):
            dist = TorchCategorical if torch else Categorical
        elif isinstance(action_space, gym.spaces.Tuple):
            if torch:
                raise NotImplementedError("Tuple action spaces not supported "
                                          "for Pytorch.")
            child_dist = []
            input_lens = []
            for action in action_space.spaces:
                dist, action_size = ModelCatalog.get_action_dist(
                    action, config)
                child_dist.append(dist)
                input_lens.append(action_size)
            return partial(MultiActionDistribution,
                           child_distributions=child_dist,
                           action_space=action_space,
                           input_lens=input_lens), sum(input_lens)
        elif isinstance(action_space, Simplex):
            if torch:
                raise NotImplementedError("Simplex action spaces not "
                                          "supported for Pytorch.")
            dist = Dirichlet
        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            if torch:
                raise NotImplementedError("MultiDiscrete action spaces not "
                                          "supported for Pytorch.")
            return partial(MultiCategorical, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))

        return dist, dist.required_model_output_shape(action_space, config)

        raise NotImplementedError("Unsupported args: {} {}".format(
            action_space, dist_type))
Пример #15
0
    def get_action_dist(action_space,
                        config,
                        dist_type=None,
                        framework="tf",
                        **kwargs):
        """Returns a distribution class and size for the given action space.

        Args:
            action_space (Space): Action space of the target gym env.
            config (Optional[dict]): Optional model config.
            dist_type (Optional[str]): Identifier of the action distribution.
            framework (str): One of "tf" or "torch".
            kwargs (dict): Optional kwargs to pass on to the Distribution's
                constructor.

        Returns:
            dist_class (ActionDistribution): Python class of the distribution.
            dist_dim (int): The size of the input vector to the distribution.
        """
        dist = None
        config = config or MODEL_DEFAULTS
        # Custom distribution given.
        if config.get("custom_action_dist"):
            action_dist_name = config["custom_action_dist"]
            logger.debug(
                "Using custom action distribution {}".format(action_dist_name))
            dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name)
        # Dist_type is given directly as a class.
        elif type(dist_type) is type and \
                issubclass(dist_type, ActionDistribution) and \
                dist_type is not MultiActionDistribution:
            dist = dist_type
        # Box space -> DiagGaussian OR Deterministic.
        elif isinstance(action_space, gym.spaces.Box):
            if len(action_space.shape) > 1:
                raise UnsupportedSpaceException(
                    "Action space has multiple dimensions "
                    "{}. ".format(action_space.shape) +
                    "Consider reshaping this into a single dimension, "
                    "using a custom action distribution, "
                    "using a Tuple action space, or the multi-agent API.")
            # TODO(sven): Check for bounds and return SquashedNormal, etc..
            if dist_type is None:
                dist = DiagGaussian if framework == "tf" else TorchDiagGaussian
            elif dist_type == "deterministic":
                dist = Deterministic
        # Discrete Space -> Categorical.
        elif isinstance(action_space, gym.spaces.Discrete):
            dist = Categorical if framework == "tf" else TorchCategorical
        # Tuple Space -> MultiAction.
        elif dist_type is MultiActionDistribution or \
                isinstance(action_space, gym.spaces.Tuple):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Tuple action spaces not supported for Pytorch.")
            child_dist = []
            input_lens = []
            for action in action_space.spaces:
                dist, action_size = ModelCatalog.get_action_dist(
                    action, config)
                child_dist.append(dist)
                input_lens.append(action_size)
            return partial(
                MultiActionDistribution,
                action_space=action_space,
                child_distributions=child_dist,
                input_lens=input_lens), sum(input_lens)
        # Simplex -> Dirichlet.
        elif isinstance(action_space, Simplex):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "Simplex action spaces not supported for torch.")
            dist = Dirichlet
        # MultiDiscrete -> MultiCategorical.
        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            if framework == "torch":
                # TODO(sven): implement
                raise NotImplementedError(
                    "MultiDiscrete action spaces not supported for Pytorch.")
            return partial(MultiCategorical, input_lens=action_space.nvec), \
                int(sum(action_space.nvec))
        # Dict -> TODO(sven)
        elif isinstance(action_space, gym.spaces.Dict):
            # TODO(sven): implement
            raise NotImplementedError(
                "Dict action spaces are not supported, consider using "
                "gym.spaces.Tuple instead")
        # Unknown type -> Error.
        else:
            raise NotImplementedError("Unsupported args: {} {}".format(
                action_space, dist_type))

        return dist, dist.required_model_output_shape(action_space, config)
Пример #16
0
    def _setup(self, config):
        env = self._env_id
        if env:
            config["env"] = env
            if _global_registry.contains(ENV_CREATOR, env):
                self.env_creator = _global_registry.get(ENV_CREATOR, env)
            else:
                import gym  # soft dependency
                self.env_creator = lambda env_config: gym.make(env)
        else:
            self.env_creator = lambda env_config: None

        # Merge the supplied config with the class default, but store the
        # user-provided one.
        self.raw_user_config = config
        self.config = Trainer.merge_trainer_configs(self._default_config,
                                                    config)

        if self.config["normalize_actions"]:
            inner = self.env_creator

            def normalize(env):
                import gym  # soft dependency
                if not isinstance(env, gym.Env):
                    raise ValueError(
                        "Cannot apply NormalizeActionActionWrapper to env of "
                        "type {}, which does not subclass gym.Env.", type(env))
                return NormalizeActionWrapper(env)

            self.env_creator = lambda env_config: normalize(inner(env_config))

        Trainer._validate_config(self.config)
        log_level = self.config.get("log_level")
        if log_level in ["WARN", "ERROR"]:
            logger.info("Current log_level is {}. For more information, "
                        "set 'log_level': 'INFO' / 'DEBUG' or use the -v and "
                        "-vv flags.".format(log_level))
        if self.config.get("log_level"):
            logging.getLogger("ray.rllib").setLevel(self.config["log_level"])

        def get_scope():
            if tf and not tf.executing_eagerly():
                return tf.Graph().as_default()
            else:
                return open("/dev/null")  # fake a no-op scope

        with get_scope():
            self._init(self.config, self.env_creator)

            # Evaluation setup.
            if self.config.get("evaluation_interval"):
                # Update env_config with evaluation settings:
                extra_config = copy.deepcopy(self.config["evaluation_config"])
                # Assert that user has not unset "in_evaluation".
                assert "in_evaluation" not in extra_config or \
                    extra_config["in_evaluation"] is True
                extra_config.update({
                    "batch_mode": "complete_episodes",
                    "batch_steps": 1,
                    "in_evaluation": True,
                })
                logger.debug(
                    "using evaluation_config: {}".format(extra_config))

                self.evaluation_workers = self._make_workers(
                    self.env_creator,
                    self._policy,
                    merge_dicts(self.config, extra_config),
                    num_workers=self.config["evaluation_num_workers"])
                self.evaluation_metrics = {}
Пример #17
0
def get_env_creator(env_id):
    """Return the environment creator funtion for the given environment id."""
    if not _global_registry.contains(ENV_CREATOR, env_id):
        raise ValueError(f"Environment id {env_id} not registered in Tune")
    return _global_registry.get(ENV_CREATOR, env_id)
Пример #18
0
    def get_model_v2(obs_space,
                     action_space,
                     num_outputs,
                     model_config,
                     framework="tf",
                     name=None,
                     model_interface=None,
                     **model_kwargs):
        """Returns a suitable model compatible with given spaces and output.

        Args:
            obs_space (Space): Observation space of the target gym env. This
                may have an `original_space` attribute that specifies how to
                unflatten the tensor into a ragged tensor.
            action_space (Space): Action space of the target gym env.
            num_outputs (int): The size of the output vector of the model.
            framework (str): Either "tf" or "torch".
            name (str): Name (scope) for the model.
            model_interface (cls): Interface required for the model
            model_kwargs (dict): args to pass to the ModelV2 constructor

        Returns:
            model (ModelV2): Model to use for the policy.
        """

        if model_config.get("custom_model"):
            model_cls = _global_registry.get(RLLIB_MODEL,
                                             model_config["custom_model"])
            if issubclass(model_cls, ModelV2):
                if model_interface and not issubclass(model_cls,
                                                      model_interface):
                    raise ValueError("The given model must subclass",
                                     model_interface)
                created = set()

                # Track and warn if variables were created but no registered
                def track_var_creation(next_creator, **kw):
                    v = next_creator(**kw)
                    created.add(v)
                    return v

                with tf.variable_creator_scope(track_var_creation):
                    instance = model_cls(obs_space, action_space, num_outputs,
                                         model_config, name, **model_kwargs)
                registered = set(instance.variables())
                not_registered = set()
                for var in created:
                    if var not in registered:
                        not_registered.add(var)
                if not_registered:
                    raise ValueError(
                        "It looks like variables {} were created as part of "
                        "{} but does not appear in model.variables() ({}). "
                        "Did you forget to call model.register_variables() "
                        "on the variables in question?".format(
                            not_registered, instance, registered))
                return instance

        if framework == "tf":
            legacy_model_cls = ModelCatalog.get_model
            wrapper = ModelCatalog._wrap_if_needed(
                make_v1_wrapper(legacy_model_cls), model_interface)
            return wrapper(obs_space, action_space, num_outputs, model_config,
                           name, **model_kwargs)

        raise NotImplementedError("TODO: support {} models".format(framework))
Пример #19
0
def rollout(agent,
            env_name,
            num_steps,
            num_episodes=0,
            saver=None,
            no_render=True,
            video_dir=None):
    policy_agent_mapping = default_policy_agent_mapping

    if saver is None:
        saver = RolloutSaver()

    # Normal case: Agent was setup correctly with an evaluation WorkerSet,
    # which we will now use to rollout.
    if hasattr(agent, "evaluation_workers") and isinstance(
            agent.evaluation_workers, WorkerSet):
        steps = 0
        episodes = 0
        while keep_going(steps, num_steps, episodes, num_episodes):
            saver.begin_rollout()
            eval_result = agent.evaluate()["evaluation"]
            # Increase timestep and episode counters.
            eps = agent.config["evaluation_num_episodes"]
            episodes += eps
            steps += eps * eval_result["episode_len_mean"]
            # Print out results and continue.
            print("Episode #{}: reward: {}".format(
                episodes, eval_result["episode_reward_mean"]))
            saver.end_rollout()
        return

    # Agent has no evaluation workers, but RolloutWorkers.
    elif hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet):
        env = agent.workers.local_worker().env
        multiagent = isinstance(env, MultiAgentEnv)
        if agent.workers.local_worker().multiagent:
            policy_agent_mapping = agent.config["multiagent"][
                "policy_mapping_fn"]
        policy_map = agent.workers.local_worker().policy_map
        state_init = {p: m.get_initial_state() for p, m in policy_map.items()}
        use_lstm = {p: len(s) > 0 for p, s in state_init.items()}
    # Agent has neither evaluation- nor rollout workers.
    else:
        from gym import envs
        if envs.registry.env_specs.get(agent.config["env"]):
            # if environment is gym environment, load from gym
            env = gym.make(agent.config["env"])
        else:
            # if environment registered ray environment, load from ray
            env_creator = _global_registry.get(ENV_CREATOR,
                                               agent.config["env"])
            env_context = EnvContext(agent.config["env_config"] or {},
                                     worker_index=0)
            env = env_creator(env_context)
        multiagent = False
        try:
            policy_map = {DEFAULT_POLICY_ID: agent.policy}
        except AttributeError:
            raise AttributeError(
                "Agent ({}) does not have a `policy` property! This is needed "
                "for performing (trained) agent rollouts.".format(agent))
        use_lstm = {DEFAULT_POLICY_ID: False}

    action_init = {
        p: flatten_to_single_ndarray(m.action_space.sample())
        for p, m in policy_map.items()
    }

    # If monitoring has been requested, manually wrap our environment with a
    # gym monitor, which is set to record every episode.
    if video_dir:
        env = gym_wrappers.Monitor(env=env,
                                   directory=video_dir,
                                   video_callable=lambda _: True,
                                   force=True)

    steps = 0
    episodes = 0
    while keep_going(steps, num_steps, episodes, num_episodes):
        mapping_cache = {}  # in case policy_agent_mapping is stochastic
        saver.begin_rollout()
        obs = env.reset()
        agent_states = DefaultMapping(
            lambda agent_id: state_init[mapping_cache[agent_id]])
        prev_actions = DefaultMapping(
            lambda agent_id: action_init[mapping_cache[agent_id]])
        prev_rewards = collections.defaultdict(lambda: 0.)
        done = False
        reward_total = 0.0
        while not done and keep_going(steps, num_steps, episodes,
                                      num_episodes):
            multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs}
            action_dict = {}
            for agent_id, a_obs in multi_obs.items():
                if a_obs is not None:
                    policy_id = mapping_cache.setdefault(
                        agent_id, policy_agent_mapping(agent_id))
                    p_use_lstm = use_lstm[policy_id]
                    if p_use_lstm:
                        a_action, p_state, _ = agent.compute_action(
                            a_obs,
                            state=agent_states[agent_id],
                            prev_action=prev_actions[agent_id],
                            prev_reward=prev_rewards[agent_id],
                            policy_id=policy_id)
                        agent_states[agent_id] = p_state
                    else:
                        a_action = agent.compute_action(
                            a_obs,
                            prev_action=prev_actions[agent_id],
                            prev_reward=prev_rewards[agent_id],
                            policy_id=policy_id)
                    a_action = flatten_to_single_ndarray(a_action)
                    action_dict[agent_id] = a_action
                    prev_actions[agent_id] = a_action
            action = action_dict

            action = action if multiagent else action[_DUMMY_AGENT_ID]
            next_obs, reward, done, info = env.step(action)
            if multiagent:
                for agent_id, r in reward.items():
                    prev_rewards[agent_id] = r
            else:
                prev_rewards[_DUMMY_AGENT_ID] = reward

            if multiagent:
                done = done["__all__"]
                reward_total += sum(r for r in reward.values()
                                    if r is not None)
            else:
                reward_total += reward
            if not no_render:
                env.render()
            saver.append_step(obs, action, next_obs, reward, done, info)
            steps += 1
            obs = next_obs
        saver.end_rollout()
        print("Episode #{}: reward: {}".format(episodes, reward_total))
        if done:
            episodes += 1