示例#1
0
    def __init__(self,
                 network_spec,
                 value_weights_spec=None,
                 value_biases_spec=None,
                 value_activation=None,
                 value_fold_time_rank=False,
                 value_unfold_time_rank=False,
                 scope="shared-value-function-policy",
                 **kwargs):
        super(SharedValueFunctionPolicy, self).__init__(network_spec,
                                                        scope=scope,
                                                        **kwargs)

        # Create the extra value dense layer with 1 node.
        self.value_unfold_time_rank = value_unfold_time_rank
        self.value_network = NeuralNetwork(
            DenseLayer(
                units=1,
                activation=value_activation,
                weights_spec=value_weights_spec,
                biases_spec=value_biases_spec,
            ),
            fold_time_rank=value_fold_time_rank,
            unfold_time_rank=value_unfold_time_rank,
            scope="value-function-node")

        self.add_components(self.value_network)
示例#2
0
    def test_time_rank_folding_for_large_dense_nn(self):
        vector_dim = 256
        input_space = FloatBox(shape=(vector_dim, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        base_config = config_from_path("configs/test_large_dense_nn.json")
        neural_net_wo_folding = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net_wo_folding,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (256, 200)
        inputs = input_space.sample(sample_shape)

        start = time.monotonic()
        runs = 10
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_wo_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_wo_folding))

        neural_net_w_folding = NeuralNetwork.from_spec(base_config)

        # Folded space.
        input_space_folded = FloatBox(shape=(vector_dim, ),
                                      add_batch_rank=True)
        inputs = input_space.sample(sample_shape[0] * sample_shape[1])

        test = ComponentTest(component=neural_net_w_folding,
                             input_spaces=dict(nn_input=input_space_folded))

        start = time.monotonic()
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_w_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_w_folding))

        recursive_assert_almost_equal(runtime_w_folding,
                                      runtime_wo_folding,
                                      decimals=0)
示例#3
0
    def test_time_rank_folding_for_large_cnn_nn(self):
        width = 86
        height = 86
        time_rank = 20
        input_space = FloatBox(shape=(width, height, 3),
                               add_batch_rank=True,
                               add_time_rank=True,
                               time_major=True)
        base_config = config_from_path("configs/test_3x_cnn_nn.json")
        base_config.insert(0, {"type": "reshape", "fold_time_rank": True})
        base_config.append({
            "type": "reshape",
            "unfold_time_rank": time_rank,
            "time_major": True
        })
        neural_net = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (time_rank, 256)
        inputs = input_space.sample(sample_shape)

        out = test.test(("call", inputs), expected_outputs=None)["output"]

        self.assertTrue(out.shape == (time_rank, 256, 7 * 7 * 64))
        self.assertTrue(out.dtype == np.float32)
    def __init__(self, input_network_specs, post_network_spec=None, **kwargs):
        """
        Args:
            input_network_specs (Union[Dict[str,dict],Tuple[dict]]): A specification dict or tuple with values being
                the spec dicts for the single streams. The `call` method expects a dict input or a single tuple input
                (not as *args) in its first parameter.

            post_network_spec (Optional[]): The specification dict of the post-concat network or the post-concat
                network object itself.
        """
        super(MultiInputStreamNeuralNetwork,
              self).__init__(scope="multi-input-stream-nn", **kwargs)

        # Create all streams' networks.
        if isinstance(input_network_specs, dict):
            self.input_stream_nns = {}
            for i, (flat_key, nn_spec) in enumerate(
                    flatten_op(input_network_specs).items()):
                self.input_stream_nns[flat_key] = NeuralNetwork.from_spec(
                    nn_spec, scope="input-stream-nn-{}".format(i))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(dict_keys=list(
                self.input_stream_nns.keys()),
                                            axis=-1)
        else:
            assert isinstance(input_network_specs, (list, tuple)),\
                "ERROR: `input_network_specs` must be dict or tuple/list!"
            self.input_stream_nns = []
            for i, nn_spec in enumerate(input_network_specs):
                self.input_stream_nns.append(
                    NeuralNetwork.from_spec(
                        nn_spec, scope="input-stream-nn-{}".format(i)))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(axis=-1)

        # Create the post-network (after the concat).
        self.post_nn = NeuralNetwork.from_spec(
            post_network_spec, scope="post-concat-nn")  # type: NeuralNetwork

        # Add all sub-Components.
        self.add_components(
            self.post_nn, self.concat_layer,
            *list(self.input_stream_nns.values() if isinstance(
                input_network_specs, dict) else self.input_stream_nns))
示例#5
0
    def __init__(self,
                 action_space=None,
                 final_shape=None,
                 weights_spec=None,
                 biases_spec=None,
                 activation=None,
                 pre_network_spec=None,
                 scope="action-adapter",
                 **kwargs):
        """
        Args:
            action_space (Optional[Space]): The action Space within which this Component will create actions.
                NOTE: Exactly one of `action_space` of `final_shape` must be provided.

            final_shape (Optional[Tuple[int]): An optional final output shape (in case action_space is not provided).
                If None, will calculate the shape automatically from the given `action_space`.
                NOTE: Exactly one of `action_space` of `final_shape` must be provided.

            weights_spec (Optional[any]): An optional RLGraph Initializer spec that will be used to initialize the
                weights of `self.action layer`. Default: None (use default initializer).

            biases_spec (Optional[any]): An optional RLGraph Initializer spec that will be used to initialize the
                biases of `self.action layer`. Default: None (use default initializer, which is usually 0.0).

            activation (Optional[str]): The activation function to use for `self.action_layer`.
                Default: None (=linear).

            pre_network_spec (Optional[dict,NeuralNetwork]): A spec dict for a neural network coming before the
                last action layer. If None, only the action layer itself is applied.
        """
        # Build the action layer for this adapter based on the given action-space.
        self.action_space = None
        if action_space is not None:
            self.action_space = action_space.with_batch_rank()
            assert not isinstance(self.action_space, ContainerSpace),\
                "ERROR: ActionAdapter cannot handle ContainerSpaces!"

        units, self.final_shape = self.get_units_and_shape()

        action_layer = DenseLayer(units=units,
                                  activation=activation,
                                  weights_spec=weights_spec,
                                  biases_spec=biases_spec,
                                  scope="action-layer")

        # Do we have a pre-NN?
        self.network = NeuralNetwork.from_spec(
            pre_network_spec, scope="action-network")  # type: NeuralNetwork
        self.network.add_layer(action_layer)

        # Add the reshape layer to match the action space's shape.
        self.network.add_layer(ReShape(new_shape=self.final_shape))

        super(ActionAdapter, self).__init__(self.network,
                                            scope=scope,
                                            **kwargs)
    def __init__(self, z_units, encoder_network_spec, decoder_network_spec,
                 **kwargs):
        """
        Args:
            z_units (int): Number of units of the latent (z) vectors that the encoder will produce.

            encoder_network_spec (Union[dict,NeuralNetwork]): Specification dict to construct an encoder
                NeuralNetwork object from or a NeuralNetwork Component directly.

            decoder_network_spec (Union[dict,NeuralNetwork]): Specification dict to construct a decoder
                NeuralNetwork object from or a NeuralNetwork Component directly.
        """
        super(VariationalAutoEncoder,
              self).__init__(scope="variational-auto-encoder", **kwargs)

        self.z_units = z_units

        # Create encoder and decoder networks.
        self.encoder_network = NeuralNetwork.from_spec(encoder_network_spec,
                                                       scope="encoder-network")
        self.decoder_network = NeuralNetwork.from_spec(decoder_network_spec,
                                                       scope="decoder-network")

        # Create the two Gaussian layers.
        self.mean_layer = DenseLayer(units=self.z_units, scope="mean-layer")
        self.stddev_layer = DenseLayer(units=self.z_units,
                                       scope="stddev-layer")

        # Create the Normal Distribution from which to sample.
        self.normal_distribution = Normal()

        # A concat layer to concat mean and stddev before passing it to the Normal distribution.
        # No longer needed: Pass Tuple (mean + stddev) into API-method instead of concat'd tensor.
        #self.concat_layer = ConcatLayer(axis=-1)

        # Add all sub-Components.
        self.add_components(
            self.encoder_network,
            self.decoder_network,
            self.mean_layer,
            self.stddev_layer,
            self.normal_distribution  #, self.concat_layer
        )
示例#7
0
    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 deterministic=True,
                 scope="policy",
                 distributions_spec=None,
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Union[dict,Space]): A specification dict to create the Space within which this Component
                will create actions or the action Space object directly.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.

            distributions_spec (dict): Specifies bounded and discrete distribution types, and optionally additional
                configuration parameters such as temperature.

            batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order
                to fold time rank into batch rank before a forward pass.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(
            network_spec)  # type: NeuralNetwork
        self.deterministic = deterministic
        self.action_adapters = {}
        self.distributions = {}

        self.distributions_spec = distributions_spec if distributions_spec is not None else {}
        self.bounded_distribution_type = self.distributions_spec.get(
            "bounded_distribution_type", "beta")
        self.discrete_distribution_type = self.distributions_spec.get(
            "discrete_distribution_type", "categorical")
        # For discrete approximations.
        self.gumbel_softmax_temperature = self.distributions_spec.get(
            "gumbel_softmax_temperature", 1.0)

        self._create_action_adapters_and_distributions(
            action_space=action_space, action_adapter_spec=action_adapter_spec)

        self.add_components(*[self.neural_network] +
                            list(self.action_adapters.values()) +
                            list(self.distributions.values()))
        self.flat_action_space = None
示例#8
0
    def __init__(self, network_spec, scope="value-function", **kwargs):
        """
        Args:
            network_spec (list): Layer specification for baseline network.
        """
        super(ValueFunction, self).__init__(scope=scope, **kwargs)

        # Attach VF output to hidden layers.
        value_layer = {
            "type": "dense",
            "units": 1,
            "activation": "linear",
            "scope": "value-function-output"
        }
        network_spec.append(value_layer)
        self.neural_network = NeuralNetwork.from_spec(network_spec)
        self.add_components(self.neural_network)
示例#9
0
    def __init__(self, network_spec, action_space=None, action_adapter_spec=None,
                 deterministic=True, scope="policy", bounded_distribution_type="beta",
                 discrete_distribution_type="categorical", **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.

            bounded_distribution_type(str): The class of distributions to use for bounded action spaces. For options
                check the components.distributions package. Default: beta.

            discrete_distribution_type(str): The class of distributions to use for discrete action spaces. For options
                check the components.distributions package. Default: categorical. Agents requiring reparameterization
                may require a GumbelSoftmax distribution instead.


            batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order
                to fold time rank into batch rank before a forward pass.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(network_spec)  # type: NeuralNetwork
        self.deterministic = deterministic
        self.action_adapters = dict()
        self.distributions = dict()
        self.bounded_distribution_type = bounded_distribution_type
        self.discrete_distribution_type = discrete_distribution_type

        self._create_action_adapters_and_distributions(
            action_space=action_space, action_adapter_spec=action_adapter_spec
        )

        self.add_components(
            *[self.neural_network] + list(self.action_adapters.values()) + list(self.distributions.values())
        )
示例#10
0
class SharedValueFunctionPolicy(Policy):
    def __init__(self,
                 network_spec,
                 value_weights_spec=None,
                 value_biases_spec=None,
                 value_activation=None,
                 value_fold_time_rank=False,
                 value_unfold_time_rank=False,
                 scope="shared-value-function-policy",
                 **kwargs):
        super(SharedValueFunctionPolicy, self).__init__(network_spec,
                                                        scope=scope,
                                                        **kwargs)

        # Create the extra value dense layer with 1 node.
        self.value_unfold_time_rank = value_unfold_time_rank
        self.value_network = NeuralNetwork(
            DenseLayer(
                units=1,
                activation=value_activation,
                weights_spec=value_weights_spec,
                biases_spec=value_biases_spec,
            ),
            fold_time_rank=value_fold_time_rank,
            unfold_time_rank=value_unfold_time_rank,
            scope="value-function-node")

        self.add_components(self.value_network)

    @rlgraph_api
    def get_state_values(self, nn_input, internal_states=None):
        """
        Returns the state value node's output.

        Args:
            nn_input (any): The input to our neural network.
            internal_states (Optional[any]): The initial internal states going into an RNN-based neural network.

        Returns:
            Dict:
                state_values: The single (but batched) value function node output.
        """
        nn_output = self.get_nn_output(nn_input, internal_states)
        if self.value_unfold_time_rank is True:
            state_values = self.value_network.apply(nn_output["output"],
                                                    nn_input)
        else:
            state_values = self.value_network.apply(nn_output["output"])

        return dict(state_values=state_values["output"],
                    last_internal_states=nn_output.get("last_internal_states"))

    @rlgraph_api
    def get_state_values_logits_probabilities_log_probs(
            self, nn_input, internal_states=None):
        """
        Similar to `get_values_logits_probabilities_log_probs`, but also returns in the return dict under key
        `state_value` the output of our state-value function node.

        Args:
            nn_input (any): The input to our neural network.
            internal_states (Optional[any]): The initial internal states going into an RNN-based neural network.

        Returns:
            Dict:
                state_values: The single (but batched) value function node output.
                logits: The (reshaped) logits from the ActionAdapter.
                probabilities: The probabilities gained from the softmaxed logits.
                log_probs: The log(probabilities) values.
                last_internal_states: The last internal states (if network is RNN-based).
        """
        nn_output = self.get_nn_output(nn_input, internal_states)
        logits, probabilities, log_probs = self._graph_fn_get_action_adapter_logits_probabilities_log_probs(
            nn_output["output"], nn_input)
        if self.value_unfold_time_rank is True:
            state_values = self.value_network.apply(nn_output["output"],
                                                    nn_input)
        else:
            state_values = self.value_network.apply(nn_output["output"])

        return dict(state_values=state_values["output"],
                    logits=logits,
                    probabilities=probabilities,
                    log_probs=log_probs,
                    last_internal_states=nn_output.get("last_internal_states"))
示例#11
0
 def build_value_function(self):
     # Attach VF output to hidden layers.
     self.network_spec.append(self.value_layer)
     self.neural_network = NeuralNetwork.from_spec(self.network_spec)
     self.add_components(self.neural_network)
示例#12
0
    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 deterministic=True,
                 scope="policy",
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.

            batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order
                to fold time rank into batch rank before a forward pass.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(
            network_spec)  # type: NeuralNetwork

        # Create the necessary action adapters for this Policy. One for each action space component.
        self.action_adapters = dict()
        if action_space is None:
            self.action_adapters[""] = ActionAdapter.from_spec(
                action_adapter_spec)
            self.action_space = self.action_adapters[""].action_space
            # Assert single component action space.
            assert len(self.action_space.flatten()) == 1,\
                "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!"
        else:
            self.action_space = Space.from_spec(action_space)
            for i, (flat_key, action_component) in enumerate(
                    self.action_space.flatten().items()):
                if action_adapter_spec is not None:
                    aa_spec = action_adapter_spec.get(flat_key,
                                                      action_adapter_spec)
                    aa_spec["action_space"] = action_component
                else:
                    aa_spec = dict(action_space=action_component)
                self.action_adapters[flat_key] = ActionAdapter.from_spec(
                    aa_spec, scope="action-adapter-{}".format(i))

        self.deterministic = deterministic

        # Figure out our Distributions.
        self.distributions = dict()
        for i, (flat_key, action_component) in enumerate(
                self.action_space.flatten().items()):
            if isinstance(action_component, IntBox):
                self.distributions[flat_key] = Categorical(
                    scope="categorical-{}".format(i))
            # Continuous action space -> Normal distribution (each action needs mean and variance from network).
            elif isinstance(action_component, FloatBox):
                self.distributions[flat_key] = Normal(
                    scope="normal-{}".format(i))
            else:
                raise RLGraphError(
                    "ERROR: `action_component` is of type {} and not allowed in {} Component!"
                    .format(type(action_space).__name__, self.name))

        self.add_components(*[self.neural_network] +
                            list(self.action_adapters.values()) +
                            list(self.distributions.values()))
示例#13
0
class SharedValueFunctionPolicy(Policy):
    def __init__(self,
                 network_spec,
                 value_weights_spec=None,
                 value_biases_spec=None,
                 value_activation=None,
                 value_fold_time_rank=False,
                 value_unfold_time_rank=False,
                 scope="shared-value-function-policy",
                 **kwargs):
        super(SharedValueFunctionPolicy, self).__init__(network_spec,
                                                        scope=scope,
                                                        **kwargs)

        # Create the extra value dense layer with 1 node.
        self.value_unfold_time_rank = value_unfold_time_rank
        self.value_network = NeuralNetwork(
            DenseLayer(
                units=1,
                activation=value_activation,
                weights_spec=value_weights_spec,
                biases_spec=value_biases_spec,
            ),
            fold_time_rank=value_fold_time_rank,
            unfold_time_rank=value_unfold_time_rank,
            scope="value-function-node")

        self.add_components(self.value_network)

    @rlgraph_api
    def get_state_values(self, nn_inputs):  # , internal_states=None
        """
        Returns the state value node's output.

        Args:
            nn_inputs (any): The input to our neural network.
            #internal_states (Optional[any]): The initial internal states going into an RNN-based neural network.

        Returns:
            Dict:
                state_values: The single (but batched) value function node output.
        """
        nn_outputs = self.get_nn_outputs(nn_inputs)
        #if self.value_unfold_time_rank is True:
        #    state_values = self.value_network.call(nn_outputs, nn_inputs)
        #else:
        state_values = self.value_network.call(nn_outputs)

        return dict(state_values=state_values, nn_outputs=nn_outputs)

    @rlgraph_api
    def get_state_values_adapter_outputs_and_parameters(
            self, nn_inputs):  #, internal_states=None
        """
        Similar to `get_values_logits_probabilities_log_probs`, but also returns in the return dict under key
        `state_value` the output of our state-value function node.

        Args:
            nn_inputs (any): The input to our neural network.
            #internal_states (Optional[any]): The initial internal states going into an RNN-based neural network.

        Returns:
            Dict:
                nn_outputs: The raw NN outputs.
                state_values: The single (but batched) value function node output.
                adapter_outputs: The (reshaped) logits from the ActionAdapter.
                parameters: The parameters for the distribution (gained from the softmaxed logits or interpreting
                    logits as mean and stddev for a normal distribution).
                log_probs: The log(probabilities) values.
        """
        nn_outputs = self.get_nn_outputs(nn_inputs)
        adapter_outputs, parameters, log_probs = self._graph_fn_get_adapter_outputs_and_parameters(
            nn_outputs)
        #if self.value_unfold_time_rank is True:
        #    state_values = self.value_network.call(nn_outputs, nn_inputs)
        #else:
        state_values = self.value_network.call(nn_outputs)

        return dict(nn_outputs=nn_outputs,
                    state_values=state_values,
                    adapter_outputs=adapter_outputs,
                    parameters=parameters,
                    log_probs=log_probs)

    def get_state_values_logits_probabilities_log_probs(
            self, nn_input, internal_states=None):
        raise RLGraphObsoletedError(
            "API-method", "get_state_values_logits_probabilities_log_probs",
            "get_state_values_adapter_outputs_and_parameters")

    def get_state_values_logits_parameters_log_probs(self,
                                                     nn_input,
                                                     internal_states=None):
        raise RLGraphObsoletedError(
            "API-method", "get_state_values_logits_parameters_log_probs",
            "get_state_values_adapter_outputs_and_parameters")
示例#14
0
    def __init__(self,
                 action_space,
                 add_units=0,
                 units=None,
                 weights_spec=None,
                 biases_spec=None,
                 activation=None,
                 pre_network_spec=None,
                 scope="action-adapter",
                 **kwargs):
        """
        Args:
            action_space (Space): The action Space within which this Component will create actions.

            add_units (Optional[int]): An optional number of units to add to the auto-calculated number of action-
                layer nodes. Can be negative to subtract units from the auto-calculated value.
                NOTE: Only one of either `add_units` or `units` must be provided.

            units (Optional[int]): An optional number of units to use for the action-layer. If None, will calculate
                the number of units automatically from the given action_space.
                NOTE: Only one of either `add_units` or `units` must be provided.

            weights_spec (Optional[any]): An optional RLGraph Initializer spec that will be used to initialize the
                weights of `self.action layer`. Default: None (use default initializer).

            biases_spec (Optional[any]): An optional RLGraph Initializer spec that will be used to initialize the
                biases of `self.action layer`. Default: None (use default initializer, which is usually 0.0).

            activation (Optional[str]): The activation function to use for `self.action_layer`.
                Default: None (=linear).

            pre_network_spec (Optional[dict,NeuralNetwork]): A spec dict for a neural network coming before the
                last action layer. If None, only the action layer itself is applied.
        """
        # Build the action layer for this adapter based on the given action-space.
        self.action_space = action_space.with_batch_rank()
        assert not isinstance(
            self.action_space, ContainerSpace
        ), "ERROR: ActionAdapter cannot handle ContainerSpaces!"
        # Calculate the number of nodes in the action layer (DenseLayer object) depending on our action Space
        # or using a given fixed number (`units`).
        # Also generate the ReShape sub-Component and give it the new_shape.
        if isinstance(self.action_space, IntBox):
            if units is None:
                units = add_units + self.action_space.flat_dim_with_categories
            new_shape = self.action_space.get_shape(with_category_rank=True)
        else:
            if units is None:
                units = add_units + 2 * self.action_space.flat_dim  # Those two dimensions are the mean and log sd
            # Manually add moments after batch/time ranks.
            new_shape = tuple([2] + list(self.action_space.shape))

        assert units > 0, "ERROR: Number of nodes for action-layer calculated as {}! Must be larger 0.".format(
            units)

        action_layer = DenseLayer(units=units,
                                  activation=activation,
                                  weights_spec=weights_spec,
                                  biases_spec=biases_spec,
                                  scope="action-layer")

        # Do we have a pre-NN?
        self.network = NeuralNetwork.from_spec(
            pre_network_spec, scope="action-network")  # type: NeuralNetwork
        self.network.add_layer(action_layer)

        # Add the reshape layer to match the action space's shape.
        self.network.add_layer(ReShape(new_shape=new_shape))

        super(ActionAdapter, self).__init__(self.network,
                                            scope=scope,
                                            **kwargs)
示例#15
0
    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 max_likelihood=True,
                 scope="policy",
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            max_likelihood (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(network_spec)
        if action_space is None:
            self.action_adapter = ActionAdapter.from_spec(action_adapter_spec)
            action_space = self.action_adapter.action_space
        else:
            self.action_adapter = ActionAdapter.from_spec(
                action_adapter_spec, action_space=action_space)
        self.action_space = action_space
        self.max_likelihood = max_likelihood

        # TODO: Hacky trick to implement IMPALA post-LSTM256 time-rank folding and unfolding.
        # TODO: Replace entirely via sonnet-like BatchApply Component.
        is_impala = "IMPALANetwork" in type(self.neural_network).__name__

        # Add API-method to get baseline output (if we use an extra value function baseline node).
        if isinstance(self.action_adapter, BaselineActionAdapter):
            # TODO: IMPALA attempt to speed up final pass after LSTM.
            if is_impala:
                self.time_rank_folder = ReShape(fold_time_rank=True,
                                                scope="time-rank-fold")
                self.time_rank_unfolder_v = ReShape(unfold_time_rank=True,
                                                    time_major=True,
                                                    scope="time-rank-unfold-v")
                self.time_rank_unfolder_a_probs = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-a-probs")
                self.time_rank_unfolder_logits = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-logits")
                self.time_rank_unfolder_log_probs = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-log-probs")
                self.add_components(self.time_rank_folder,
                                    self.time_rank_unfolder_v,
                                    self.time_rank_unfolder_a_probs,
                                    self.time_rank_unfolder_log_probs,
                                    self.time_rank_unfolder_logits)

            @rlgraph_api(component=self)
            def get_state_values_logits_probabilities_log_probs(
                    self, nn_input, internal_states=None):
                nn_output = self.neural_network.apply(nn_input,
                                                      internal_states)
                last_internal_states = nn_output.get("last_internal_states")
                nn_output = nn_output["output"]

                # TODO: IMPALA attempt to speed up final pass after LSTM.
                if is_impala:
                    nn_output = self.time_rank_folder.apply(nn_output)

                out = self.action_adapter.get_logits_probabilities_log_probs(
                    nn_output)

                # TODO: IMPALA attempt to speed up final pass after LSTM.
                if is_impala:
                    state_values = self.time_rank_unfolder_v.apply(
                        out["state_values"], nn_output)
                    logits = self.time_rank_unfolder_logits.apply(
                        out["logits"], nn_output)
                    probs = self.time_rank_unfolder_a_probs.apply(
                        out["probabilities"], nn_output)
                    log_probs = self.time_rank_unfolder_log_probs.apply(
                        out["log_probs"], nn_output)
                else:
                    state_values = out["state_values"]
                    logits = out["logits"]
                    probs = out["probabilities"]
                    log_probs = out["log_probs"]

                return dict(state_values=state_values,
                            logits=logits,
                            probabilities=probs,
                            log_probs=log_probs,
                            last_internal_states=last_internal_states)

        # Figure out our Distribution.
        if isinstance(action_space, IntBox):
            self.distribution = Categorical()
        # Continuous action space -> Normal distribution (each action needs mean and variance from network).
        elif isinstance(action_space, FloatBox):
            self.distribution = Normal()
        else:
            raise RLGraphError(
                "ERROR: `action_space` is of type {} and not allowed in {} Component!"
                .format(type(action_space).__name__, self.name))

        self.add_components(self.neural_network, self.action_adapter,
                            self.distribution)

        if is_impala:
            self.add_components(self.time_rank_folder,
                                self.time_rank_unfolder_v,
                                self.time_rank_unfolder_a_probs,
                                self.time_rank_unfolder_log_probs,
                                self.time_rank_unfolder_logits)
    def __init__(
            self, action_space, world_option_model_network, encoder_network, num_features, num_mixtures, beta=0.2,
            post_phi_concat_network=None,
            reward_clipping=1.0,
            intrinsic_rewards_weight=0.1,
            concat_with_command_vector=False,
            optimizer=None, deterministic=False, scope="intrinsic-curiosity-world-option-model",
            **kwargs
    ):
        """
        Args:
            action_space (Space): The action Space to be fed into the model together with the latent feature vector
                for the states. Will be flattened automatically and then concatenated by this component.

            world_option_model_network (Union[NeuralNetwork,dict]): A specification dict (or NN object directly) to
                construct the world-option-model's neural network.

            encoder_network (Union[NeuralNetwork,dict]): A specification dict (or NN object directly) to
                construct the inverse dynamics model's encoder network leading from s to phi (feature vector).

            num_features (int): The size of the feature vectors phi.

            num_mixtures (int): The number of mixture Normals to use for the next-state distribution output.

            beta (float): The weight for the phi' loss (action loss is then 1.0 - beta).

            post_phi_concat_network

            reward_clipping (float): 0.0 for no clipping, some other value for +/- reward value clipping.
                Default: 1.0.

            concat_with_command_vector (bool): If True, this model needs an additional command vector (coming from the
                policy above) to concat it together with the latent state vector.

            optimizer (Optional[Optimizer]): The optimizer to use for supervised learning of the two networks
                (ICM and WOM).
        """
        self.num_features = num_features
        self.num_mixtures = num_mixtures
        self.deterministic = deterministic
        self.beta = beta
        assert 0.0 < self.beta < 1.0, "ERROR: `beta` must be between 0 and 1!"
        self.reward_clipping = reward_clipping
        self.intrinsic_rewards_weight = intrinsic_rewards_weight

        # Create the encoder network inside a SupervisedPredictor (so we get the adapter + distribution with it).
        self.state_encoder = SupervisedPredictor(
            network_spec=encoder_network, output_space=FloatBox(shape=(num_features,), add_batch_rank=True),
            scope="state-encoder"
        )

        # Create the container loss function for the two prediction tasks:
        # a) Action prediction and b) next-state prediction, each of them using a simple neg log likelihood loss
        # comparing the actual action and s' with their log-likelihood value vs the respective distributions.
        self.loss_functions = dict(
            # Action prediction loss (neg log likelihood of observed action vs the parameterized distribution).
            predicted_actions=NegativeLogLikelihoodLoss(
                distribution_spec=get_default_distribution_from_space(action_space),
                scope="action-loss"
            ),
            # s' prediction loss (neg log likelihood of observed s' vs the parameterized mixed normal distribution).
            predicted_phi_=NegativeLogLikelihoodLoss(distribution_spec=dict(type="mixture", _args=[
                "multi-variate-normal" for _ in range(num_mixtures)
            ]), scope="phi-loss")
        )

        # TODO: Support for command vector concatenation.
        #self.concat_with_command_vector = concat_with_command_vector

        # Define the Model's network's custom call method.
        def custom_call(self, inputs):
            phi = inputs["phi"]
            actions = inputs["actions"]
            phi_ = inputs["phi_"]
            actions_flat = self.get_sub_component_by_name("action-flattener").call(actions)
            concat_phis = self.get_sub_component_by_name("concat-phis").call(phi, phi_)
            # Predict the action that lead from s to s'.
            predicted_actions = self.get_sub_component_by_name("post-phi-concat-nn").call(concat_phis)

            # Concat phi with flattened actions.
            phi_and_actions = self.get_sub_component_by_name("concat-states-and-actions").call(
                phi, actions_flat
            )
            # Add stop-gradient to phi here before predicting phi'
            # (the phis should only be trained by the inverse dynamics model, not by the world option model).
            # NOT DONE IN ORIGINAL PAPER's CODE AND ALSO NOT IN MLAGENTS EQUIVALENT.
            # phi_and_actions = self.get_sub_component_by_name("stop-gradient").stop(phi_and_actions)
            # Predict phi' (through a mixture gaussian distribution).
            predicted_phi_ = self.get_sub_component_by_name("wom-nn").call(phi_and_actions)

            return dict(
                # Predictions (actions and next-state-features (mixture distribution)).
                predicted_actions=predicted_actions,
                predicted_phi_=predicted_phi_
                ## Also return the two feature vectors for s and s'.
                #phi=phi, phi_=phi_
            )

        # Create the SupervisedPredictor's neural network.
        predictor_network = NeuralNetwork(
            # The world option model network taking action-cat-phi and mapping them to the predicted phi'.
            NeuralNetwork.from_spec(world_option_model_network, scope="wom-nn"),
            # The concat component concatenating both latent state vectors (phi and phi').
            ConcatLayer(scope="concat-phis"),
            # The NN mapping from phi-cat-phi' to the action prediction.
            NeuralNetwork.from_spec(post_phi_concat_network, scope="post-phi-concat-nn"),
            # The ReShape component for flattening all actions in arbitrary action spaces.
            ReShape(flatten=True, flatten_categories=True, flatten_containers=True, scope="action-flattener"),
            # The concat component concatenating latent state feature vector and incoming (flattened) actions.
            ConcatLayer(scope="concat-states-and-actions"),
            # Set the `call` method.
            api_methods={("call", custom_call)}
        )

        if optimizer is None:
            optimizer = dict(type="adam", learning_rate=3e-4)

        super(IntrinsicCuriosityWorldOptionModel, self).__init__(
            predictor=dict(
                network_spec=predictor_network,
                output_space=Dict({
                    "predicted_actions": action_space,
                    "predicted_phi_": FloatBox(shape=(self.num_features,))
                }, add_batch_rank=action_space.has_batch_rank, add_time_rank=action_space.has_time_rank),
                distribution_adapter_spec=dict(
                    # for `predicted_actions`: use default adapter
                    # for predicted_phi': use normal-mixture adapter & distribution.
                    predicted_phi_={"type": "normal-mixture-adapter", "num_mixtures": num_mixtures}
                ),
                deterministic=deterministic
            ),
            loss_function=self.loss_functions["predicted_actions"],
            optimizer=optimizer, scope=scope, **kwargs
        )

        self.add_components(self.state_encoder, self.loss_functions["predicted_phi_"])