示例#1
0
    def _get_model(input_dict, obs_space, num_outputs, options, state_in,
                   seq_lens):
        if options.get("custom_model"):
            model = options["custom_model"]
            logger.info("Using custom model {}".format(model))
            return _global_registry.get(RLLIB_MODEL, model)(
                input_dict,
                obs_space,
                num_outputs,
                options,
                state_in=state_in,
                seq_lens=seq_lens)

        obs_rank = len(input_dict["obs"].shape) - 1

        if obs_rank > 1:
            return VisionNetwork(input_dict, obs_space, num_outputs, options)

        return FullyConnectedNetwork(input_dict, obs_space, num_outputs,
                                     options)
示例#2
0
    def _get_model(inputs, num_outputs, options):
        if "custom_model" in options:
            model = options["custom_model"]
            print("Using custom model {}".format(model))
            return _global_registry.get(RLLIB_MODEL,
                                        model)(inputs, num_outputs, options)

        obs_rank = len(inputs.shape) - 1

        # num_outputs > 1 used to avoid hitting this with the value function
        if isinstance(
                options.get("custom_options", {}).get(
                    "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1:
            return MultiAgentFullyConnectedNetwork(inputs, num_outputs,
                                                   options)

        if obs_rank > 1:
            return VisionNetwork(inputs, num_outputs, options)

        return FullyConnectedNetwork(inputs, num_outputs, options)
示例#3
0
文件: catalog.py 项目: the-sea/ray
    def get_model(inputs, num_outputs, options=None):
        """Returns a suitable model conforming to given input and output specs.

        Args:
            inputs (Tensor): The input tensor to the model.
            num_outputs (int): The size of the output vector of the model.
            options (dict): Optional args to pass to the model constructor.

        Returns:
            model (Model): Neural network model.
        """

        if options is None:
            options = {}

        obs_rank = len(inputs.get_shape()) - 1

        if obs_rank > 1:
            return VisionNetwork(inputs, num_outputs, options)

        return FullyConnectedNetwork(inputs, num_outputs, options)
示例#4
0
文件: catalog.py 项目: cathywu/ray
    def get_model(registry, inputs, num_outputs, options=dict()):
        """Returns a suitable model conforming to given input and output specs.

        Args:
            registry (obj): Registry of named objects (ray.tune.registry).
            inputs (Tensor): The input tensor to the model.
            num_outputs (int): The size of the output vector of the model.
            options (dict): Optional args to pass to the model constructor.

        Returns:
            model (Model): Neural network model.
        """

        if "custom_model" in options:
            model = options["custom_model"]
            print("Using custom model {}".format(model))
            return registry.get(RLLIB_MODEL, model)(inputs, num_outputs,
                                                    options)

        obs_rank = len(inputs.shape) - 1

        # num_outputs > 1 used to avoid hitting this with the value function
        if isinstance(
                options.get("custom_options", {}).get(
                    "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1:
            return MultiAgentFullyConnectedNetwork(inputs, num_outputs,
                                                   options)

        if obs_rank > 1:
            return VisionNetwork(inputs, num_outputs, options)

        # Use two-level network if the hidden sizes are a nested list
        if "hierarchical_fcnet_hiddens" in options.get("custom_options",
                                                       {}) and num_outputs > 1:
            return TwoLevelFCNetwork(inputs, num_outputs, options)

        return FullyConnectedNetwork(inputs, num_outputs, options)
    def __init__(self, env_creator, config, is_ext_train=False):
        self.local_steps = 0
        self.config = config
        self.summarize = config.get("summarize")
        env = ModelCatalog.get_preprocessor_as_wrapper(
            env_creator(self.config["env_config"]), self.config["model"])

        if is_ext_train:
            train_dataset = input_fn(
                self.config["inverse_model"]["ext_train_file_path"])
            valid_dataset = input_fn(
                self.config["inverse_model"]["ext_valid_file_path"])
            iterator = tf.data.Iterator.from_structure(
                train_dataset.output_types, train_dataset.output_shapes)
            next_element = iterator.get_next()
            self.x = next_element[0]
            self.ac = next_element[1]

            self.training_init_op = iterator.make_initializer(train_dataset)
            self.validation_init_op = iterator.make_initializer(valid_dataset)
        else:
            self.x = tf.placeholder(
                tf.float32,
                shape=[
                    None,
                    numpy.prod([2] + list(env.observation_space.shape))
                ])
            if isinstance(env.action_space, gym.spaces.Box):
                self.ac = tf.placeholder(tf.float32,
                                         [None] + list(env.action_space.shape),
                                         name="ac")
            elif isinstance(env.action_space, gym.spaces.Discrete):
                self.ac = tf.placeholder(tf.int64, [None], name="ac")
            else:
                raise NotImplementedError("action space" +
                                          str(type(env.action_space)) +
                                          "currently not supported")

        # Setup graph
        dist_class, logit_dim = ModelCatalog.get_action_dist(
            env.action_space, self.config["model"])
        self._model = FullyConnectedNetwork(self.x, logit_dim, {})
        self.logits = self._model.outputs
        self.curr_dist = dist_class(self.logits)
        self.sample = self.curr_dist.sample()
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)

        # Setup loss
        log_prob = self.curr_dist.logp(self.ac)
        self.pi_loss = -tf.reduce_sum(log_prob)
        self.loss = self.pi_loss
        self.optimizer = tf.train.AdamOptimizer(self.config["lr"]).minimize(
            self.loss)

        # Setup similarity -> cosine similarity
        normalize_sample = tf.nn.l2_normalize(self.sample, 1)
        normalize_ac = tf.nn.l2_normalize(self.ac, 1)
        self.similarity = 1 - tf.losses.cosine_distance(
            normalize_sample, normalize_ac, dim=1)

        # Initialize
        self.initialize()