def __init__(self, num_units_a=3, num_units_b=2, scope="dummy-nn-with-dict-input", **kwargs):
        super(DummyNNWithDictInput, self).__init__(scope=scope, **kwargs)

        self.num_units_a = num_units_a
        self.num_units_b = num_units_b

        # Splits the input into two streams.
        self.splitter = ContainerSplitter("a", "b")
        self.stack_a = DenseLayer(units=self.num_units_a, scope="dense-a")
        self.stack_b = DenseLayer(units=self.num_units_b, scope="dense-b")
        self.concat_layer = ConcatLayer()

        # Add all sub-components to this one.
        self.add_components(self.splitter, self.stack_a, self.stack_b, self.concat_layer)
Exemplo n.º 2
0
    def __init__(self,
                 preprocessor_spec,
                 policy_spec,
                 exploration_spec=None,
                 **kwargs):
        """
        Args:
            preprocessor_spec (Union[list,dict,PreprocessorSpec]):
                - A dict if the state from the Env will come in as a ContainerSpace (e.g. Dict). In this case, each
                    each key in this dict specifies, which value in the incoming dict should go through which PreprocessorStack.
                - A list with layer specs.
                - A PreprocessorStack object.

            policy_spec (Union[dict,Policy]): A specification dict for a Policy object or a Policy object directly.

            exploration_spec (Union[dict,Exploration]): A specification dict for an Exploration object or an Exploration
                object directly.
        """
        super(ActorComponent,
              self).__init__(scope=kwargs.pop("scope", "actor-component"),
                             **kwargs)

        self.preprocessor = PreprocessorStack.from_spec(preprocessor_spec)
        self.policy = Policy.from_spec(policy_spec)
        self.num_nn_inputs = self.policy.neural_network.num_inputs
        self.exploration = Exploration.from_spec(exploration_spec)

        self.tuple_merger = ContainerMerger(is_tuple=True,
                                            merge_tuples_into_one=True)
        self.tuple_splitter = ContainerSplitter(
            tuple_length=self.num_nn_inputs)

        self.add_components(self.policy, self.exploration, self.preprocessor,
                            self.tuple_merger, self.tuple_splitter)
class DummyNNWithDictInput(NeuralNetwork):
    """
    Dummy NN with dict input taking a dict with keys "a" and "b" passes them both through two different (parallel,
    not connected in any way) dense layers and then concatenating the outputs to yield the final output.
    """

    def __init__(self, num_units_a=3, num_units_b=2, scope="dummy-nn-with-dict-input", **kwargs):
        super(DummyNNWithDictInput, self).__init__(scope=scope, **kwargs)

        self.num_units_a = num_units_a
        self.num_units_b = num_units_b

        # Splits the input into two streams.
        self.splitter = ContainerSplitter("a", "b")
        self.stack_a = DenseLayer(units=self.num_units_a, scope="dense-a")
        self.stack_b = DenseLayer(units=self.num_units_b, scope="dense-b")
        self.concat_layer = ConcatLayer()

        # Add all sub-components to this one.
        self.add_components(self.splitter, self.stack_a, self.stack_b, self.concat_layer)

    @rlgraph_api
    def call(self, input_dict):
        # Split the input dict into two streams.
        input_a, input_b = self.splitter.call(input_dict)

        # Get the two stack outputs.
        output_a = self.stack_a.call(input_a)
        output_b = self.stack_b.call(input_b)

        # Concat everything together, that's the output.
        concatenated_data = self.concat_layer.call(output_a, output_b)

        return concatenated_data
Exemplo n.º 4
0
    def __init__(self, worker_sample_size=100, scope="impala-network", **kwargs):
        """
        Args:
            worker_sample_size (int): How many time-steps an IMPALA actor will have performed in one rollout.
        """
        super(IMPALANetwork, self).__init__(scope=scope, **kwargs)

        self.worker_sample_size = worker_sample_size

        # Create all needed sub-components.

        # ContainerSplitter for the Env signal (dict of 4 keys: for env image, env text, previous action and reward).
        self.splitter = ContainerSplitter("RGB_INTERLEAVED", "INSTR", "previous_action", "previous_reward",
                                          scope="input-splitter")

        # Fold the time rank into the batch rank.
        self.time_rank_fold_before_lstm = ReShape(fold_time_rank=True, scope="time-rank-fold-before-lstm")
        self.time_rank_unfold_before_lstm = ReShape(unfold_time_rank=True, time_major=True,
                                                    scope="time-rank-unfold-before-lstm")

        # The Image Processing Stack (left side of "Large Architecture" Figure 3 in [1]).
        # Conv2D column + ReLU + fc(256) + ReLU.
        self.image_processing_stack = self.build_image_processing_stack()

        # The text processing pipeline: Takes a batch of string tensors as input, creates a hash-bucket thereof,
        # and passes the output of the hash bucket through an embedding-lookup(20) layer. The output of the embedding
        # lookup is then passed through an LSTM(64).
        self.text_processing_stack = self.build_text_processing_stack()

        #self.debug_slicer = Slice(scope="internal-states-slicer", squeeze=True)

        # The concatenation layer (concatenates outputs from image/text processing stacks, previous action/reward).
        self.concat_layer = ConcatLayer()

        # The main LSTM (going into the ActionAdapter (next in the Policy Component that uses this NN Component)).
        # Use time-major as it's faster (say tf docs).
        self.main_lstm = LSTMLayer(units=256, scope="lstm-256", time_major=True, static_loop=self.worker_sample_size)

        # Add all sub-components to this one.
        self.add_components(
            self.splitter, self.image_processing_stack, self.text_processing_stack,
            self.concat_layer,
            self.main_lstm,
            self.time_rank_fold_before_lstm, self.time_rank_unfold_before_lstm,
            #self.debug_slicer
        )
Exemplo n.º 5
0
    def build_text_processing_stack():
        """
        Helper function to build the text processing pipeline for both the large and small architectures, consisting of:
        - ReShape preprocessor to fold the incoming time rank into the batch rank.
        - StringToHashBucket Layer taking a batch of sentences and converting them to an indices-table of dimensions:
          cols=length of longest sentences in input
          rows=number of items in the batch
          The cols dimension could be interpreted as the time rank into a consecutive LSTM. The StringToHashBucket
          Component returns the sequence length of each batch item for exactly that purpose.
        - Embedding Lookup Layer of embedding size 20 and number of rows == num_hash_buckets (see previous layer).
        - LSTM processing the batched sequences of words coming from the embedding layer as batches of rows.
        """
        num_hash_buckets = 1000

        # Create a hash bucket from the sentences and use that bucket to do an embedding lookup (instead of
        # a vocabulary).
        string_to_hash_bucket = StringToHashBucket(num_hash_buckets=num_hash_buckets)
        embedding = EmbeddingLookup(embed_dim=20, vocab_size=num_hash_buckets, pad_empty=True)
        # The time rank for the LSTM is now the sequence of words in a sentence, NOT the original env time rank.
        # We will only use the last output of the LSTM-64 for further processing as that is the output after having
        # seen all words in the sentence.
        # The original env stepping time rank is currently folded into the batch rank and must be unfolded again before
        # passing it into the main LSTM.
        lstm64 = LSTMLayer(units=64, scope="lstm-64", time_major=False)

        tuple_splitter = ContainerSplitter(tuple_length=2, scope="tuple-splitter")

        def custom_call(self, inputs):
            hash_bucket, lengths = self.sub_components["string-to-hash-bucket"].call(inputs)

            embedding_output = self.sub_components["embedding-lookup"].call(hash_bucket)

            # Return only the last output (sentence of words, where we are not interested in intermediate results
            # where the LSTM has not seen the entire sentence yet).
            # Last output is the final internal h-state (slot 1 in the returned LSTM tuple; slot 0 is final c-state).
            lstm_output = self.sub_components["lstm-64"].call(embedding_output, sequence_length=lengths)
            lstm_final_internals = lstm_output["last_internal_states"]

            # Need to split once more because the LSTM state is always a tuple of final c- and h-states.
            _, lstm_final_h_state = self.sub_components["tuple-splitter"].call(lstm_final_internals)

            return lstm_final_h_state

        text_processing_stack = Stack(
            string_to_hash_bucket, embedding, lstm64, tuple_splitter,
            api_methods={("call", custom_call)}, scope="text-stack"
        )

        return text_processing_stack
    def test_tuple_splitter(self):
        space = Tuple(FloatBox(shape=()),
                      bool,
                      IntBox(low=0, high=255),
                      add_batch_rank=True)
        # Define the output-order.
        splitter = ContainerSplitter(tuple_length=len(space))
        test = ComponentTest(component=splitter,
                             input_spaces=dict(inputs=space))

        # Single sample (batch size=6).
        input_ = space.sample(size=6)
        expected_outputs = [input_[0], input_[1], input_[2]]

        test.test(("call", (input_, )), expected_outputs=expected_outputs)
    def test_dict_splitter_with_different_input_space(self):
        space = Dict(a=Tuple(bool, FloatBox(shape=())),
                     b=FloatBox(shape=()),
                     c=bool,
                     d=IntBox(low=0, high=255),
                     e=dict(ea=float),
                     f=FloatBox(shape=(3, 2)),
                     add_batch_rank=False)
        # Define the output-order.
        splitter = ContainerSplitter("b", "c", "d", "a", "f", "e")
        test = ComponentTest(component=splitter,
                             input_spaces=dict(inputs=space))

        # Single sample (no batch rank).
        input_ = space.sample()
        expected_outputs = [
            input_["b"], input_["c"], input_["d"], input_["a"], input_["f"],
            input_["e"]
        ]

        test.test(("call", input_), expected_outputs=expected_outputs)
    def test_dict_splitter(self):
        space = Dict(a=dict(aa=bool, ab=float),
                     b=dict(ba=bool),
                     c=float,
                     d=IntBox(low=0, high=255),
                     e=IntBox(2),
                     f=FloatBox(shape=(3, 2)),
                     g=Tuple(bool, FloatBox(shape=())),
                     add_batch_rank=True)
        # Define the output-order.
        splitter = ContainerSplitter("g", "a", "b", "c", "d", "e", "f")
        test = ComponentTest(component=splitter,
                             input_spaces=dict(inputs=space))

        # Get a batch of samples.
        input_ = space.sample(size=3)
        expected_output = [
            input_["g"], input_["a"], input_["b"], input_["c"], input_["d"],
            input_["e"], input_["f"]
        ]
        test.test(("call", input_), expected_outputs=expected_output)
    def test_functional_api_multi_stream_nn(self):
        # Input Space of the network.
        input_space = Dict(
            {
                "img": FloatBox(shape=(6, 6, 3)),  # some RGB img
                "txt": TextBox()  # some text
            },
            add_batch_rank=True,
            add_time_rank=True)

        img, txt = ContainerSplitter("img", "txt")(input_space)
        # Complex NN assembly via our Keras-style functional API.
        # Fold text input into single batch rank.
        folded_text = ReShape(fold_time_rank=True)(txt)
        # String layer will create batched AND time-ranked (individual words) hash outputs (int64).
        string_bucket_out, lengths = StringToHashBucket(
            num_hash_buckets=5)(folded_text)
        # Batched and time-ranked embedding output (floats) with embed dim=n.
        embedding_out = EmbeddingLookup(embed_dim=10,
                                        vocab_size=5)(string_bucket_out)
        # Pass embeddings through a text LSTM and use last output (reduce time-rank).
        string_lstm_out, _ = LSTMLayer(units=2,
                                       return_sequences=False,
                                       scope="lstm-layer-txt")(
                                           embedding_out,
                                           sequence_length=lengths)
        # Unfold to get original time-rank back.
        string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(
            string_lstm_out, txt)

        # Parallel image stream via 1 CNN layer plus dense.
        folded_img = ReShape(fold_time_rank=True, scope="img-fold")(img)
        cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img)
        unfolded_cnn_out = ReShape(unfold_time_rank=True,
                                   scope="img-unfold")(cnn_out, img)
        unfolded_cnn_out_flattened = ReShape(
            flatten=True, scope="img-flat")(unfolded_cnn_out)
        dense_out = DenseLayer(units=2,
                               scope="dense-0")(unfolded_cnn_out_flattened)

        # Concat everything.
        concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out)

        # LSTM output has batch+time.
        main_lstm_out, internal_states = LSTMLayer(
            units=2, scope="lstm-layer-main")(concat_out)

        dense1_after_lstm_out = DenseLayer(units=3,
                                           scope="dense-1")(main_lstm_out)
        dense2_after_lstm_out = DenseLayer(
            units=2, scope="dense-2")(dense1_after_lstm_out)
        dense3_after_lstm_out = DenseLayer(
            units=1, scope="dense-3")(dense2_after_lstm_out)

        # A NN with 2 outputs.
        neural_net = NeuralNetwork(
            outputs=[dense3_after_lstm_out, main_lstm_out, internal_states])

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        sample_shape = (4, 2)
        input_ = input_space.sample(sample_shape)

        out = test.test(("call", input_), expected_outputs=None)
        # Main output (Dense out after LSTM).
        self.assertTrue(out[0].shape == sample_shape +
                        (1, ))  # 1=1 unit in dense layer
        self.assertTrue(out[0].dtype == np.float32)
        # main-LSTM out.
        self.assertTrue(out[1].shape == sample_shape + (2, ))  # 2=2 LSTM units
        self.assertTrue(out[1].dtype == np.float32)
        # main-LSTM internal-states.
        self.assertTrue(out[2][0].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][0].dtype == np.float32)
        self.assertTrue(out[2][1].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][1].dtype == np.float32)

        test.terminate()
Exemplo n.º 10
0
class IMPALANetwork(NeuralNetwork):
    """
    The base class for both "large and small architecture" versions of the networks used in [1].

    [1] IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures - Espeholt, Soyer,
        Munos et al. - 2018 (https://arxiv.org/abs/1802.01561)
    """
    def __init__(self, worker_sample_size=100, scope="impala-network", **kwargs):
        """
        Args:
            worker_sample_size (int): How many time-steps an IMPALA actor will have performed in one rollout.
        """
        super(IMPALANetwork, self).__init__(scope=scope, **kwargs)

        self.worker_sample_size = worker_sample_size

        # Create all needed sub-components.

        # ContainerSplitter for the Env signal (dict of 4 keys: for env image, env text, previous action and reward).
        self.splitter = ContainerSplitter("RGB_INTERLEAVED", "INSTR", "previous_action", "previous_reward",
                                          scope="input-splitter")

        # Fold the time rank into the batch rank.
        self.time_rank_fold_before_lstm = ReShape(fold_time_rank=True, scope="time-rank-fold-before-lstm")
        self.time_rank_unfold_before_lstm = ReShape(unfold_time_rank=True, time_major=True,
                                                    scope="time-rank-unfold-before-lstm")

        # The Image Processing Stack (left side of "Large Architecture" Figure 3 in [1]).
        # Conv2D column + ReLU + fc(256) + ReLU.
        self.image_processing_stack = self.build_image_processing_stack()

        # The text processing pipeline: Takes a batch of string tensors as input, creates a hash-bucket thereof,
        # and passes the output of the hash bucket through an embedding-lookup(20) layer. The output of the embedding
        # lookup is then passed through an LSTM(64).
        self.text_processing_stack = self.build_text_processing_stack()

        #self.debug_slicer = Slice(scope="internal-states-slicer", squeeze=True)

        # The concatenation layer (concatenates outputs from image/text processing stacks, previous action/reward).
        self.concat_layer = ConcatLayer()

        # The main LSTM (going into the ActionAdapter (next in the Policy Component that uses this NN Component)).
        # Use time-major as it's faster (say tf docs).
        self.main_lstm = LSTMLayer(units=256, scope="lstm-256", time_major=True, static_loop=self.worker_sample_size)

        # Add all sub-components to this one.
        self.add_components(
            self.splitter, self.image_processing_stack, self.text_processing_stack,
            self.concat_layer,
            self.main_lstm,
            self.time_rank_fold_before_lstm, self.time_rank_unfold_before_lstm,
            #self.debug_slicer
        )

    @staticmethod
    def build_image_processing_stack():
        """
        Builds the image processing pipeline for IMPALA and returns it.
        """
        raise NotImplementedError

    @staticmethod
    def build_text_processing_stack():
        """
        Helper function to build the text processing pipeline for both the large and small architectures, consisting of:
        - ReShape preprocessor to fold the incoming time rank into the batch rank.
        - StringToHashBucket Layer taking a batch of sentences and converting them to an indices-table of dimensions:
          cols=length of longest sentences in input
          rows=number of items in the batch
          The cols dimension could be interpreted as the time rank into a consecutive LSTM. The StringToHashBucket
          Component returns the sequence length of each batch item for exactly that purpose.
        - Embedding Lookup Layer of embedding size 20 and number of rows == num_hash_buckets (see previous layer).
        - LSTM processing the batched sequences of words coming from the embedding layer as batches of rows.
        """
        num_hash_buckets = 1000

        # Create a hash bucket from the sentences and use that bucket to do an embedding lookup (instead of
        # a vocabulary).
        string_to_hash_bucket = StringToHashBucket(num_hash_buckets=num_hash_buckets)
        embedding = EmbeddingLookup(embed_dim=20, vocab_size=num_hash_buckets, pad_empty=True)
        # The time rank for the LSTM is now the sequence of words in a sentence, NOT the original env time rank.
        # We will only use the last output of the LSTM-64 for further processing as that is the output after having
        # seen all words in the sentence.
        # The original env stepping time rank is currently folded into the batch rank and must be unfolded again before
        # passing it into the main LSTM.
        lstm64 = LSTMLayer(units=64, scope="lstm-64", time_major=False)

        tuple_splitter = ContainerSplitter(tuple_length=2, scope="tuple-splitter")

        def custom_call(self, inputs):
            hash_bucket, lengths = self.sub_components["string-to-hash-bucket"].call(inputs)

            embedding_output = self.sub_components["embedding-lookup"].call(hash_bucket)

            # Return only the last output (sentence of words, where we are not interested in intermediate results
            # where the LSTM has not seen the entire sentence yet).
            # Last output is the final internal h-state (slot 1 in the returned LSTM tuple; slot 0 is final c-state).
            lstm_output = self.sub_components["lstm-64"].call(embedding_output, sequence_length=lengths)
            lstm_final_internals = lstm_output["last_internal_states"]

            # Need to split once more because the LSTM state is always a tuple of final c- and h-states.
            _, lstm_final_h_state = self.sub_components["tuple-splitter"].call(lstm_final_internals)

            return lstm_final_h_state

        text_processing_stack = Stack(
            string_to_hash_bucket, embedding, lstm64, tuple_splitter,
            api_methods={("call", custom_call)}, scope="text-stack"
        )

        return text_processing_stack

    @rlgraph_api
    def call(self, input_dict, internal_states=None):
        # Split the input dict coming directly from the Env.
        _, _, _, orig_previous_reward = self.splitter.call(input_dict)

        folded_input = self.time_rank_fold_before_lstm.call(input_dict)
        image, text, previous_action, previous_reward = self.splitter.call(folded_input)

        # Get the left-stack (image) and right-stack (text) output (see [1] for details).
        text_processing_output = self.text_processing_stack.call(text)
        image_processing_output = self.image_processing_stack.call(image)

        # Concat everything together.
        concatenated_data = self.concat_layer.call(
            image_processing_output, text_processing_output, previous_action, previous_reward
        )

        unfolded_concatenated_data = self.time_rank_unfold_before_lstm.call(concatenated_data, orig_previous_reward)

        # Feed concat'd input into main LSTM(256).
        lstm_output = self.main_lstm.call(unfolded_concatenated_data, internal_states)

        return lstm_output
Exemplo n.º 11
0
    def __init__(self, *layers, **kwargs):
        """
        Args:
            *layers (Component): Same as `sub_components` argument of Stack. Can be used to add Layer Components
                (or any other Components) to this Network.

        Keyword Args:
            layers (Optional[list]): An optional list of Layer objects or spec-dicts to overwrite(!)
                *layers.

            inputs (Optional[List[Space]]): A list of Spaces or a single Space object defining the input spaces for
                the `call` method of this network. Must be provided, if more than one input arg are needed by `call`
                to determine the order in which these inputs will come in.

            outputs (Optional[List[NNCallOutput]]): A list or single output NNCallOutput object,
                indicating that we have to infer the `call` method from the graph given by these outputs.
                This is used iff a NN is constructed by the Keras-style functional API.

            num_inputs (Optional[int]): An optional number of inputs the `call` method will take as `*inputs`.
                If not given, NN will try to infer this value automatically.

            fold_time_rank (bool): Whether to overwrite the `fold_time_rank` option for the apply method.
                Only for auto-generated `call` method. Default: None.

            unfold_time_rank (bool): Whether to overwrite the `unfold_time_rank` option for the `call` method.
                Only for auto-generated `call` method. Default: None.
        """
        # In case layers come in via a spec dict -> push it into *layers.
        layers_args = kwargs.pop("layers", layers)
        # Add a default scope (if not given) and pass on via kwargs.
        kwargs["scope"] = kwargs.get("scope", "neural-network")
        self.keras_style_api_outputs = force_list(kwargs.pop("outputs", None))
        self.keras_style_api_inputs = force_list(kwargs.pop("inputs", []))
        # If Keras-style inputs are given, just count those, otherwise allow for `num_inputs` hint (default: 1).
        self.num_inputs = len(self.keras_style_api_inputs)
        if self.num_inputs == 0:
            self.num_inputs = kwargs.pop("num_inputs", 1)
        self.num_outputs = min(len(self.keras_style_api_outputs), 1)

        # Force the only API-method to be `call`. No matter whether custom-API or auto-generated (via Stack).
        self.custom_call_given = True
        if not hasattr(self, "call"):
            # Automatically create the `call` stack.
            if "api_methods" not in kwargs:
                kwargs["api_methods"] = [
                    dict(api="call_shadowed_", component_api="call")
                ]
                self.custom_call_given = False
            # Sanity check `api_method` to contain only specifications on `call`.
            else:
                assert len(kwargs["api_methods"]) == 1, \
                    "ERROR: Only 0 or 1 given API-methods are allowed in NeuralNetwork ctor! You provided " \
                    "'{}'.".format(kwargs["api_methods"])
                # Make sure the only allowed api_method is `call`.
                assert next(iter(kwargs["api_methods"]))[0] == "call", \
                    "ERROR: NeuralNetwork's custom API-method must be called `call`! You named it '{}'.". \
                    format(next(iter(kwargs["api_methods"]))[0])

            # Follow given options.
            fold_time_rank = kwargs.pop("fold_time_rank", None)
            if fold_time_rank is not None:
                kwargs["api_methods"][0]["fold_time_rank"] = fold_time_rank
            unfold_time_rank = kwargs.pop("unfold_time_rank", None)
            if unfold_time_rank is not None:
                kwargs["api_methods"][0]["unfold_time_rank"] = unfold_time_rank

        assert len(self.keras_style_api_outputs) == 0 or self.custom_call_given is False, \
            "ERROR: If functional API is used to construct network, a custom `call` method must not be provided!"

        # Pytorch specific objects.
        self.network_obj = None
        self.non_layer_components = None

        super(NeuralNetwork, self).__init__(*layers_args, **kwargs)

        # In case we have more than one input (and not using Keras-style assembly),
        # add another input splitter here.
        self.inputs_splitter = None
        if self.num_inputs > 1:
            self.inputs_splitter = ContainerSplitter(
                tuple_length=self.num_inputs, scope=".helper-inputs-splitter")
            self.add_components(self.inputs_splitter)
Exemplo n.º 12
0
class NeuralNetwork(Stack):
    """
    A NeuralNetwork is a Stack, in which the `call` method is defined either by custom-API-method OR by connecting
    through all sub-Components' `call` methods. The signature of the `call` method is always (self, *inputs).
    In all cases, 1 or more values may be returned by `call`.
    No other API methods other than `call` should be defined/used.
    """
    def __init__(self, *layers, **kwargs):
        """
        Args:
            *layers (Component): Same as `sub_components` argument of Stack. Can be used to add Layer Components
                (or any other Components) to this Network.

        Keyword Args:
            layers (Optional[list]): An optional list of Layer objects or spec-dicts to overwrite(!)
                *layers.

            inputs (Optional[List[Space]]): A list of Spaces or a single Space object defining the input spaces for
                the `call` method of this network. Must be provided, if more than one input arg are needed by `call`
                to determine the order in which these inputs will come in.

            outputs (Optional[List[NNCallOutput]]): A list or single output NNCallOutput object,
                indicating that we have to infer the `call` method from the graph given by these outputs.
                This is used iff a NN is constructed by the Keras-style functional API.

            num_inputs (Optional[int]): An optional number of inputs the `call` method will take as `*inputs`.
                If not given, NN will try to infer this value automatically.

            fold_time_rank (bool): Whether to overwrite the `fold_time_rank` option for the apply method.
                Only for auto-generated `call` method. Default: None.

            unfold_time_rank (bool): Whether to overwrite the `unfold_time_rank` option for the `call` method.
                Only for auto-generated `call` method. Default: None.
        """
        # In case layers come in via a spec dict -> push it into *layers.
        layers_args = kwargs.pop("layers", layers)
        # Add a default scope (if not given) and pass on via kwargs.
        kwargs["scope"] = kwargs.get("scope", "neural-network")
        self.keras_style_api_outputs = force_list(kwargs.pop("outputs", None))
        self.keras_style_api_inputs = force_list(kwargs.pop("inputs", []))
        # If Keras-style inputs are given, just count those, otherwise allow for `num_inputs` hint (default: 1).
        self.num_inputs = len(self.keras_style_api_inputs)
        if self.num_inputs == 0:
            self.num_inputs = kwargs.pop("num_inputs", 1)
        self.num_outputs = min(len(self.keras_style_api_outputs), 1)

        # Force the only API-method to be `call`. No matter whether custom-API or auto-generated (via Stack).
        self.custom_call_given = True
        if not hasattr(self, "call"):
            # Automatically create the `call` stack.
            if "api_methods" not in kwargs:
                kwargs["api_methods"] = [
                    dict(api="call_shadowed_", component_api="call")
                ]
                self.custom_call_given = False
            # Sanity check `api_method` to contain only specifications on `call`.
            else:
                assert len(kwargs["api_methods"]) == 1, \
                    "ERROR: Only 0 or 1 given API-methods are allowed in NeuralNetwork ctor! You provided " \
                    "'{}'.".format(kwargs["api_methods"])
                # Make sure the only allowed api_method is `call`.
                assert next(iter(kwargs["api_methods"]))[0] == "call", \
                    "ERROR: NeuralNetwork's custom API-method must be called `call`! You named it '{}'.". \
                    format(next(iter(kwargs["api_methods"]))[0])

            # Follow given options.
            fold_time_rank = kwargs.pop("fold_time_rank", None)
            if fold_time_rank is not None:
                kwargs["api_methods"][0]["fold_time_rank"] = fold_time_rank
            unfold_time_rank = kwargs.pop("unfold_time_rank", None)
            if unfold_time_rank is not None:
                kwargs["api_methods"][0]["unfold_time_rank"] = unfold_time_rank

        assert len(self.keras_style_api_outputs) == 0 or self.custom_call_given is False, \
            "ERROR: If functional API is used to construct network, a custom `call` method must not be provided!"

        # Pytorch specific objects.
        self.network_obj = None
        self.non_layer_components = None

        super(NeuralNetwork, self).__init__(*layers_args, **kwargs)

        # In case we have more than one input (and not using Keras-style assembly),
        # add another input splitter here.
        self.inputs_splitter = None
        if self.num_inputs > 1:
            self.inputs_splitter = ContainerSplitter(
                tuple_length=self.num_inputs, scope=".helper-inputs-splitter")
            self.add_components(self.inputs_splitter)

    def build_auto_api_method(self,
                              stack_api_method_name,
                              component_api_method_name,
                              fold_time_rank=False,
                              unfold_time_rank=False,
                              ok_to_overwrite=False):

        if get_backend(
        ) == "pytorch" and self.execution_mode == "define_by_run":

            @rlgraph_api(name=stack_api_method_name,
                         component=self,
                         ok_to_overwrite=ok_to_overwrite)
            def method(self, nn_input, *nn_inputs, **kwargs):
                # Avoid jumping back between layers and calls at runtime.
                return self._pytorch_fast_path_exec(
                    *([nn_input] + list(nn_inputs)), **kwargs)

        # Functional API (Keras Style assembly). TODO: Add support for pytorch.
        elif len(self.keras_style_api_outputs) > 0:
            self._build_call_via_keras_style_functional_api(
                *self.keras_style_api_outputs)

        # Auto call-API -> Handle LSTMs correctly.
        elif self.custom_call_given is False:
            self._build_auto_call_method(fold_time_rank, unfold_time_rank)

        # Have super class (Stack) handle registration of given custom `call` method.
        else:
            super(NeuralNetwork,
                  self).build_auto_api_method(stack_api_method_name,
                                              component_api_method_name,
                                              fold_time_rank, unfold_time_rank,
                                              True)

    def _unfold(self, original_input, *args_, **kwargs_):
        if args_ == ():
            assert len(kwargs_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
            key = next(iter(kwargs_))
            kwargs_ = {key: self.unfolder.call(kwargs_[key], original_input)}
        else:
            assert len(args_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
        args_ = (self.unfolder.call(args_[0], original_input), )
        return args_, kwargs_

    def _fold(self, *args_, **kwargs_):
        if args_ == ():
            assert len(kwargs_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
            key = next(iter(kwargs_))
            kwargs_ = {key: self.folder.call(kwargs_[key])}
        else:
            args_ = (self.folder.call(args_[0]), )
        return args_, kwargs_

    def add_layer(self, layer_component):
        """
        Adds an additional Layer Component (even after c'tor execution) to this NN.
        TODO: Currently, layers are always added to the end.

        Args:
            layer_component (Layer): The Layer object to be added to this NN.
        """
        assert self.custom_call_given is False,\
            "ERROR: Cannot add layer to neural network if `call` API-method is a custom one!"
        assert hasattr(layer_component, self.map_api_to_sub_components_api["call_shadowed_"]), \
            "ERROR: Layer to be added ({}) does not have an API-method called '{}'!".format(
                layer_component.scope, self.map_api_to_sub_components_api["call_shadowed_"]
            )
        self.add_components(layer_component)
        self.build_auto_api_method(
            "call_shadowed_",
            self.map_api_to_sub_components_api["call_shadowed_"],
            ok_to_overwrite=True)

    def _pytorch_fast_path_exec(self, *inputs, **kwargs):
        """
        Builds a fast-path execution method for pytorch / eager.
        """
        inputs = inputs[0]
        forward_inputs = []
        for v in inputs:
            if v is not None:
                if isinstance(v, tuple):
                    # Unitary tuples
                    forward_inputs.append(v[0])
                else:
                    forward_inputs.append(v)
        result = self.network_obj.forward(*forward_inputs)
        # Problem: Not everything in the neural network stack is a true layer.
        for c in self.non_layer_components:
            result = getattr(c, "call")(*force_list(result))
        return result

    def post_define_by_run_build(self):
        # Layer objects only exist after build - define torch neural network.
        layer_objects = []
        self.non_layer_components = []
        for component in self.sub_components.values():
            if hasattr(component, "layer"):
                # Store Layer object itself.
                layer_objects.append(component.layer)

                # Append activation fn if needed.
                # N.b. linear returns None here.
                if component.activation_fn is not None:
                    layer_objects.append(component.activation_fn)
            else:
                self.non_layer_components.append(component)
        self.network_obj = torch.nn.Sequential(*layer_objects)

    def has_rnn(self):
        """
        Returns:
            True if one of our sub-Components is an LSTMLayer, False otherwise.
        """
        # TODO: Maybe it would be better to create a child class (RecurrentNeuralNetwork with has_rrn=True and
        # TODO: other available information for its API-clients such as internal_states_space, etc..)
        return any(
            isinstance(sc, LSTMLayer) for sc in self.get_all_sub_components())

    def _build_call_via_keras_style_functional_api(self, *layer_call_outputs):
        """
        Automatically builds our `call` method by traversing the given graph depth first via the following iterative
        procedure:

        Add given `layer_call_outputs` to a set.
        While still items in set that are not Spaces:
            For o in set:
                If o is lone output for its call OR all outputs are in set.
                    write call to code
                    erase outs from set
                    add ins to set
        Write `def call(self, ...)` from given Spaces.
        """
        output_set = set(layer_call_outputs)
        output_id = 0
        sub_components = set()

        def _all_siblings_in_set(output, set_):
            siblings = []
            need_to_find = output.num_outputs
            for o in set_:
                if o.component == output.component:
                    siblings.append(o)
            return len(siblings) == need_to_find, sorted(
                siblings, key=lambda s: s.output_slot)

        # Initialize var names for final outputs.
        for out in sorted(output_set):
            out.var_name = "out{}".format(output_id)
            output_id += 1

        # Write this NN's `call` API-method code dynamically, then execute it.
        call_code = "\treturn {}\n".format(", ".join(
            [o.var_name for o in layer_call_outputs]))

        prev_output_set = None

        # Input Space-IDs that we know will be used.
        functional_api_input_ids = [
            space.id for space in self.keras_style_api_inputs
        ]
        # If no inputs given -> Allow only a single-input arg setup (otherwise, there would be
        # ambiguity).
        auto_functional_api_single_input = None

        # Loop through all nodes.
        while len(output_set) > 0:
            output_list = list(output_set)

            output = next(iter(sorted(output_list)))

            # If only one output OR all outputs are in set -> Write the call.
            found_all, siblings = _all_siblings_in_set(output, output_set)
            if found_all is True:
                siblings_str = ", ".join([o.var_name for o in siblings])
            # Nothing has changed and it's the only output in list
            # Some output(s) may be dead ends (construct those as `_`).
            elif prev_output_set == output_set or (prev_output_set is None
                                                   and len(output_set) == 1):
                indices = [s.output_slot for s in siblings]
                siblings_str = ""
                for i in range(output.num_outputs):
                    siblings_str += ", " + (siblings[indices.index(i)].var_name
                                            if i in indices else "_")
                siblings_str = siblings_str[2:]  # cut preceding ", "
            else:
                continue

            # Remove outs from set.
            for sibling in siblings:
                output_set.remove(sibling)
            # Add `ins` to set (or set to one of the `inputs[?]` for the `call` method.
            for pos, in_ in enumerate(output.inputs):
                # This input is a Space -> If we can find it in `self.keras_style_api_inputs`, use the correct
                # `inputs[?]` reference here, if not, may be a child of a container input, in which case:
                # tag it for now with `inputs[Space.id]`.
                if in_.space is not None:
                    # Given Space is in this NeuralNetwork's given inputs. Use its `*inputs`-index directly.
                    if in_.space.id in functional_api_input_ids:
                        in_.var_name = "inputs[{}]".format(
                            functional_api_input_ids.index(in_.space.id))
                    # A child of an input container space. Add the necessary ContainerSplitter and `inputs`-index
                    # automatically.
                    else:
                        if len(functional_api_input_ids) == 0:
                            top_level_container_space = in_.space.get_top_level_container(
                            )
                            # Make sure it's always the same top-level container (only single input allowed in this
                            # case, due to arg-order ambiguity otherwise).
                            if auto_functional_api_single_input is not None:
                                if top_level_container_space != auto_functional_api_single_input:
                                    raise RLGraphKerasStyleAssemblyError(
                                        "When creating NeuralNetwork '{}' in Keras-style assembly and not providing "
                                        "the `inputs` arg, only one single input into the Network is allowed! You have "
                                        "{} and {}.".format(
                                            self.global_scope,
                                            auto_functional_api_single_input,
                                            top_level_container_space))
                            else:
                                auto_functional_api_single_input = top_level_container_space
                        # Look for this Space in `self.keras_style_api_inputs`.
                        index_chain = []
                        if self._get_container_space_index_chain(
                                self.keras_style_api_inputs
                                if len(self.keras_style_api_inputs) > 0 else
                            [auto_functional_api_single_input], in_.space.id,
                                index_chain) is False:
                            raise RLGraphKerasStyleAssemblyError(
                                "Input '{}' into NeuralNetwork '{}' was not found in any of the provided `inputs` "
                                "(or in the auto-derived input)!".format(
                                    in_.space, self.global_scope))
                        in_.var_name = "inputs[{}]".format(
                            "][".join(index_chain))
                elif in_.var_name is None:
                    in_.var_name = "out{}".format(output_id)
                    output_id += 1
                    output_set.add(in_)

            inputs_str = ", ".join([
                k + i.var_name
                for i, k in zip(output.inputs, output.kwarg_strings)
            ])
            call_code = "\t{} = self.get_sub_component_by_name('{}').call({})\n".format(
                siblings_str, output.component.scope, inputs_str) + call_code
            sub_components.add(output.component)

            # Store previous state of our set.
            prev_output_set = output_set

        # Prepend inputs from left-over Space objects in set.
        call_code = \
            "@rlgraph_api(component=self, ok_to_overwrite=True)\n" + \
            "def call(self, *inputs):\n" + \
            call_code

        # Add all sub-components to this NN.
        self.add_components(*list(sub_components))

        # Execute the code and assign self.call to it.
        print("`call_code` for NN:")
        print(call_code)
        exec(call_code, globals(), locals())

    def _build_auto_call_method(self, fold_time_rank, unfold_time_rank):
        @rlgraph_api(component=self, ok_to_overwrite=True)
        def call(self_, *inputs):
            # Everything is lumped together in inputs[0] but is supposed to be split -> Do this here.
            if len(inputs) == 1 and self.num_inputs > 1:
                inputs = self.inputs_splitter.call(inputs[0])

            inputs = list(inputs)
            original_input = inputs[0]

            # Keep track of the folding status.
            fold_status = "unfolded" if self.has_rnn() else None
            # Fold time rank? For now only support 1st arg folding/unfolding.
            if fold_time_rank is True:
                args_ = tuple([self.folder.call(original_input)] +
                              list(inputs[1:]))
                fold_status = "folded"
            else:
                # TODO: If only unfolding: Assume for now that 2nd input is the original one (so we can infer
                # TODO: batch/time dims).
                if unfold_time_rank is True:
                    assert len(inputs) >= 2, \
                        "ERROR: In Stack: If unfolding w/o folding, second arg must be the original input!"
                    original_input = inputs[1]
                    args_ = tuple([inputs[0]] + list(inputs[2:]))
                else:
                    args_ = inputs

            kwargs_ = {}

            # TODO: keep track of LSTMLayers that only return the last time-step (outputs after these Layers
            # TODO: can no longer be folded, their time-rank is gone for the rest of the NN.
            for i, sub_component in enumerate(
                    self_.sub_components.values()):  # type: Component
                if re.search(r'^\.helper-', sub_component.scope):
                    continue

                # Unfold before an LSTM.
                if isinstance(sub_component,
                              LSTMLayer) and fold_status != "unfolded":
                    args_, kwargs_ = self._unfold(original_input, *args_,
                                                  **kwargs_)
                    fold_status = "unfolded"
                # Fold before a non-LSTM if not already done so.
                elif not isinstance(sub_component,
                                    LSTMLayer) and fold_status == "unfolded":
                    args_, kwargs_ = self._fold(*args_, **kwargs_)
                    fold_status = "folded"

                results = sub_component.call(*args_, **kwargs_)

                # Recycle args_, kwargs_ for reuse in next sub-Component's API-method call.
                if isinstance(results, dict):
                    args_ = ()
                    kwargs_ = results
                else:
                    args_ = force_tuple(results)
                    kwargs_ = {}

            if unfold_time_rank:
                args_, kwargs_ = self._unfold(original_input, *args_,
                                              **kwargs_)
            if args_ == ():
                return kwargs_
            elif len(args_) == 1:
                return args_[0]
            else:
                self.num_outputs = len(args_)
                return args_

    @staticmethod
    def _get_container_space_index_chain(spaces, space_id, _index_chain=None):
        """
        Finds `space_id` in `spaces` and returns the actual path from the top-level Space till the child-Space
        with id=space_id.

        Args:
            spaces (Union[List[Space],Tuple[Space],Dict[str,Space]]): The container Space or list of Spaces to look
                through.

            space_id (int): The ID of the Space, we are trying to find in `spaces`.

            _index_chain (List[str,int]): The indexing chain so far. Starts with the index of the matching parent Space
                in `spaces`. E.g. given:
                spaces=(Tuple([spaceA(id=0),Dict(a=SpaceB(id=2), b=SpaceC(id=5))]))
                space_id=5
                -> returns: [1, "b"] -> pick index 1 in Tuple, then key "b" in Dict.

        Returns:
            List[str]: A list of inputs indices, e.g. ["0", "'img'", "2"] to go from the top-level Space in `spaces`
                to the given Space's id.
        """
        assert isinstance(spaces, (tuple, list, dict)), \
            "ERROR: `spaces` must be tuple/list (Tuple Space) OR dict (Dict Space)!"

        for idx, in_space in (spaces.items() if isinstance(spaces, dict) else
                              enumerate(spaces)):
            index_chain_copy = copy.deepcopy(_index_chain)
            # Found the ID.
            if in_space.id == space_id:
                _index_chain.append(
                    str(idx) if isinstance(idx, int) else "\"" + idx + "\"")
                return True
            # Another container -> recurse.
            elif isinstance(in_space, ContainerSpace):
                index_chain_copy.append(
                    str(idx) if isinstance(idx, int) else "\"" + idx + "\"")
                if NeuralNetwork._get_container_space_index_chain(
                        in_space, space_id, index_chain_copy):
                    _index_chain[:] = index_chain_copy
                    return True

        # Not found -> Return False.
        return False
Exemplo n.º 13
0
class NeuralNetwork(Stack):
    """
    A NeuralNetwork is a Stack, in which the `call` method is defined either by custom-API-method OR by connecting
    through all sub-Components' `call` methods. The signature of the `call` method is always (self, *inputs).
    In all cases, 1 or more values may be returned by `call`.
    No other API methods other than `call` should be defined/used.
    """
    def __init__(self, *layers, **kwargs):
        """
        Args:
            *layers (Component): Same as `sub_components` argument of Stack. Can be used to add Layer Components
                (or any other Components) to this Network.

        Keyword Args:
            layers (Optional[list]): An optional list of Layer objects or spec-dicts to overwrite(!)
                *layers.

            outputs (Optional[List[NNCallOutput]]): A list or single output NNCallOutput object,
                indicating that we have to infer the `call` method from the graph given by these outputs.
                This is used iff a NN is constructed by the Keras-style functional API.

            num_inputs (Optional[int]): An optional number of inputs the `call` method will take as `*inputs`.
                If not given, NN will try to infer this value automatically.

            fold_time_rank (bool): Whether to overwrite the `fold_time_rank` option for the apply method.
                Only for auto-generated `call` method. Default: None.

            unfold_time_rank (bool): Whether to overwrite the `unfold_time_rank` option for the apply method.
                Only for auto-generated `call` method. Default: None.
        """
        # In case layers come in via a spec dict -> push it into *layers.
        layers_args = kwargs.pop("layers", layers)
        # Add a default scope (if not given) and pass on via kwargs.
        kwargs["scope"] = kwargs.get("scope", "neural-network")
        self.functional_api_outputs = force_list(kwargs.pop("outputs", None))
        self.num_inputs = kwargs.pop("num_inputs", 1)
        self.num_outputs = min(len(self.functional_api_outputs), 1)

        # Force the only API-method to be `call`. No matter whether custom-API or auto-generated (via Stack).
        self.custom_call_given = True
        if not hasattr(self, "call"):
            # Automatically create the `call` stack.
            if "api_methods" not in kwargs:
                kwargs["api_methods"] = [dict(api="apply_shadowed_", component_api="call")]
                self.custom_call_given = False
            # Sanity check `api_method` to contain only specifications on `call`.
            else:
                assert len(kwargs["api_methods"]) == 1, \
                    "ERROR: Only 0 or 1 given API-methods are allowed in NeuralNetwork ctor! You provided " \
                    "'{}'.".format(kwargs["api_methods"])
                # Make sure the only allowed api_method is `call`.
                assert next(iter(kwargs["api_methods"]))[0] == "call", \
                    "ERROR: NeuralNetwork's custom API-method must be called `call`! You named it '{}'.". \
                    format(next(iter(kwargs["api_methods"]))[0])

            # Follow given options.
            fold_time_rank = kwargs.pop("fold_time_rank", None)
            if fold_time_rank is not None:
                kwargs["api_methods"][0]["fold_time_rank"] = fold_time_rank
            unfold_time_rank = kwargs.pop("unfold_time_rank", None)
            if unfold_time_rank is not None:
                kwargs["api_methods"][0]["unfold_time_rank"] = unfold_time_rank

        assert len(self.functional_api_outputs) == 0 or self.custom_call_given is False, \
            "ERROR: If functional API is used to construct network, a custom `call` method must not be provided!"

        # Pytorch specific objects.
        self.network_obj = None
        self.non_layer_components = None

        super(NeuralNetwork, self).__init__(*layers_args, **kwargs)

        self.inputs_splitter = ContainerSplitter(tuple_length=self.num_inputs, scope=".helper-inputs-splitter")
        self.add_components(self.inputs_splitter)

    def build_auto_api_method(self, stack_api_method_name, component_api_method_name, fold_time_rank=False,
                              unfold_time_rank=False, ok_to_overwrite=False):

        if get_backend() == "pytorch" and self.execution_mode == "define_by_run":
            @rlgraph_api(name=stack_api_method_name, component=self, ok_to_overwrite=ok_to_overwrite)
            def method(self, nn_input, *nn_inputs, **kwargs):
                # Avoid jumping back between layers and calls at runtime.
                return self._pytorch_fast_path_exec(*([nn_input] + list(nn_inputs)), **kwargs)

        # Functional API (Keras Style assembly). TODO: Add support for pytorch.
        elif len(self.functional_api_outputs) > 0:
            self._build_call_via_keras_style_functional_api(*self.functional_api_outputs)

        # Auto apply-API -> Handle LSTMs correctly.
        elif self.custom_call_given is False:
            self._build_auto_call_method(fold_time_rank, unfold_time_rank)

        # Have super class (Stack) handle registration of given custom `call` method.
        else:
            super(NeuralNetwork, self).build_auto_api_method(
                stack_api_method_name, component_api_method_name, fold_time_rank, unfold_time_rank, True
            )

    def _unfold(self, original_input, *args_, **kwargs_):
        if args_ == ():
            assert len(kwargs_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
            key = next(iter(kwargs_))
            kwargs_ = {key: self.unfolder.call(kwargs_[key], original_input)}
        else:
            assert len(args_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
        args_ = (self.unfolder.call(args_[0], original_input),)
        return args_, kwargs_

    def _fold(self, *args_, **kwargs_):
        if args_ == ():
            assert len(kwargs_) == 1, \
                "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
            key = next(iter(kwargs_))
            kwargs_ = {key: self.folder.call(kwargs_[key])}
        else:
            args_ = (self.folder.call(args_[0]),)
        return args_, kwargs_

    def add_layer(self, layer_component):
        """
        Adds an additional Layer Component (even after c'tor execution) to this NN.
        TODO: Currently, layers are always added to the end.

        Args:
            layer_component (Layer): The Layer object to be added to this NN.
        """
        assert self.custom_call_given is False,\
            "ERROR: Cannot add layer to neural network if `call` API-method is a custom one!"
        assert hasattr(layer_component, self.map_api_to_sub_components_api["apply_shadowed_"]), \
            "ERROR: Layer to be added ({}) does not have an API-method called '{}'!".format(
                layer_component.scope, self.map_api_to_sub_components_api["apply_shadowed_"]
            )
        self.add_components(layer_component)
        self.build_auto_api_method("apply_shadowed_", self.map_api_to_sub_components_api["apply_shadowed_"],
                                   ok_to_overwrite=True)

    def _pytorch_fast_path_exec(self, *inputs, **kwargs):
        """
        Builds a fast-path execution method for pytorch / eager.
        """
        inputs = inputs[0]
        forward_inputs = []
        for v in inputs:
            if v is not None:
                if isinstance(v, tuple):
                    # Unitary tuples
                    forward_inputs.append(v[0])
                else:
                    forward_inputs.append(v)
        result = self.network_obj.forward(*forward_inputs)
        # Problem: Not everything in the neural network stack is a true layer.
        for c in self.non_layer_components:
            result = getattr(c, "call")(*force_list(result))
        return result

    def post_define_by_run_build(self):
        # Layer objects only exist after build - define torch neural network.
        layer_objects = []
        self.non_layer_components = []
        for component in self.sub_components.values():
            if hasattr(component, "layer"):
                # Store Layer object itself.
                layer_objects.append(component.layer)

                # Append activation fn if needed.
                # N.b. linear returns None here.
                if component.activation_fn is not None:
                    layer_objects.append(component.activation_fn)
            else:
                self.non_layer_components.append(component)
        self.network_obj = torch.nn.Sequential(*layer_objects)

    def has_rnn(self):
        """
        Returns:
            True if one of our sub-Components is an LSTMLayer, False otherwise.
        """
        # TODO: Maybe it would be better to create a child class (RecurrentNeuralNetwork with has_rrn=True and
        # TODO: other available information for its API-clients such as internal_states_space, etc..)
        return any(isinstance(sc, LSTMLayer) for sc in self.get_all_sub_components())

    def _build_call_via_keras_style_functional_api(self, *layer_call_outputs):
        """
        Automatically builds our `call` method by traversing the given graph depth first via the following iterative
        procedure:

        Add given `layer_call_outputs` to a set.
        While still items in set that are not Spaces:
            For o in set:
                If o is lone output for its call OR all outputs are in set.
                    write call to code
                    erase outs from set
                    add ins to set
        Write `def call(self, ...)` from given Spaces.
        """
        output_set = set(layer_call_outputs)
        output_id = 0
        sub_components = set()
        num_inputs = 0

        def _all_siblings_in_set(output, set_):
            siblings = []
            need_to_find = output.num_outputs
            for o in set_:
                if o.component == output.component:
                    siblings.append(o)
            return len(siblings) == need_to_find, sorted(siblings, key=lambda s: s.output_slot)

        # Initialize var names for final outputs.
        for out in sorted(output_set):
            out.var_name = "out{}".format(output_id)
            output_id += 1

        # Write this NN's `call` code dynamically, then execute it.
        apply_code = "\treturn {}\n".format(", ".join([o.var_name for o in layer_call_outputs]))

        prev_output_set = None

        # Loop through all nodes.
        while len(output_set) > 0:
            output_list = list(output_set)

            output = next(iter(sorted(output_list)))

            # If only one output OR all outputs are in set -> Write the call.
            found_all, siblings = _all_siblings_in_set(output, output_set)
            if found_all is True:
                siblings_str = ", ".join([o.var_name for o in siblings])
            # Nothing has changed and it's the only output in list
            # Some output(s) may be dead ends (construct those as `_`).
            elif prev_output_set == output_set or (prev_output_set is None and len(output_set) == 1):
                indices = [s.output_slot for s in siblings]
                siblings_str = ""
                for i in range(output.num_outputs):
                    siblings_str += ", " + (siblings[indices.index(i)].var_name if i in indices else "_")
                siblings_str = siblings_str[2:]  # cut preceding ", "
            else:
                continue

            # Remove outs from set.
            for sibling in siblings:
                output_set.remove(sibling)
            # Add `ins` to set or to `apply_inputs` (if `in` is a Space).
            for pos, in_ in enumerate(output.inputs):
                if in_.space is not None:
                    in_.var_name = "inputs[{}]".format(pos)
                    if pos + 1 > num_inputs:
                        num_inputs = pos + 1
                elif in_.var_name is None:
                    in_.var_name = "out{}".format(output_id)
                    output_id += 1
                    output_set.add(in_)

            inputs_str = ", ".join([k + i.var_name for i, k in zip(output.inputs, output.kwarg_strings)])
            apply_code = "\t{} = self.get_sub_component_by_name('{}').call({})\n".format(
                siblings_str, output.component.scope, inputs_str) + apply_code
            sub_components.add(output.component)

            # Store previous state of our set.
            prev_output_set = output_set

        # Prepend inputs from left-over Space objects in set.
        apply_code = "@rlgraph_api(component=self, ok_to_overwrite=True)\n" + \
                     "def call(self, *inputs):\n" + \
                     apply_code

        # Add all sub-components to this NN.
        self.add_components(*list(sub_components))

        self.num_inputs = num_inputs

        # Execute the code and assign self.call to it.
        print("`apply_code` for NN:")
        print(apply_code)
        exec(apply_code, globals(), locals())

    def _build_auto_call_method(self, fold_time_rank, unfold_time_rank):
        @rlgraph_api(component=self, ok_to_overwrite=True)
        def call(self_, *inputs):
            # Everything is lumped together in inputs[0] but is supposed to be split -> Do this here.
            if len(inputs) == 1 and self.num_inputs > 1:
                inputs = self.inputs_splitter.call(inputs[0])

            inputs = list(inputs)
            original_input = inputs[0]

            # Keep track of the folding status.
            fold_status = "unfolded" if self.has_rnn() else None
            # Fold time rank? For now only support 1st arg folding/unfolding.
            if fold_time_rank is True:
                args_ = tuple([self.folder.call(original_input)] + list(inputs[1:]))
                fold_status = "folded"
            else:
                # TODO: If only unfolding: Assume for now that 2nd input is the original one (so we can infer
                # TODO: batch/time dims).
                if unfold_time_rank is True:
                    assert len(inputs) >= 2, \
                        "ERROR: In Stack: If unfolding w/o folding, second arg must be the original input!"
                    original_input = inputs[1]
                    args_ = tuple([inputs[0]] + list(inputs[2:]))
                else:
                    args_ = inputs

            kwargs_ = {}

            # TODO: keep track of LSTMLayers that only return the last time-step (outputs after these Layers
            # TODO: can no longer be folded, their time-rank is gone for the rest of the NN.
            for i, sub_component in enumerate(self_.sub_components.values()):  # type: Component
                if re.search(r'^\.helper-', sub_component.scope):
                    continue

                # Unfold before an LSTM.
                if isinstance(sub_component, LSTMLayer) and fold_status != "unfolded":
                    args_, kwargs_ = self._unfold(original_input, *args_, **kwargs_)
                    fold_status = "unfolded"
                # Fold before a non-LSTM if not already done so.
                elif not isinstance(sub_component, LSTMLayer) and fold_status == "unfolded":
                    args_, kwargs_ = self._fold(*args_, **kwargs_)
                    fold_status = "folded"

                results = sub_component.call(*args_, **kwargs_)

                # Recycle args_, kwargs_ for reuse in next sub-Component's API-method call.
                if isinstance(results, dict):
                    args_ = ()
                    kwargs_ = results
                else:
                    args_ = force_tuple(results)
                    kwargs_ = {}

            if unfold_time_rank:
                args_, kwargs_ = self._unfold(original_input, *args_, **kwargs_)
            if args_ == ():
                return kwargs_
            elif len(args_) == 1:
                return args_[0]
            else:
                self.num_outputs = len(args_)
                return args_
Exemplo n.º 14
0
    def __init__(self,
                 discount=0.99,
                 fifo_queue_spec=None,
                 architecture="large",
                 environment_spec=None,
                 feed_previous_action_through_nn=True,
                 feed_previous_reward_through_nn=True,
                 weight_pg=None,
                 weight_baseline=None,
                 weight_entropy=None,
                 worker_sample_size=100,
                 **kwargs):
        """
        Args:
            discount (float): The discount factor gamma.
            architecture (str): Which IMPALA architecture to use. One of "small" or "large". Will be ignored if
                `network_spec` is given explicitly in kwargs. Default: "large".
            fifo_queue_spec (Optional[dict,FIFOQueue]): The spec for the FIFOQueue to use for the IMPALA algorithm.
            environment_spec (dict): The spec for constructing an Environment object for an actor-type IMPALA agent.
            feed_previous_action_through_nn (bool): Whether to add the previous action as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_action". Default: True.
            feed_previous_reward_through_nn (bool): Whether to add the previous reward as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_reward". Default: True.
            weight_pg (float): See IMPALALossFunction Component.
            weight_baseline (float): See IMPALALossFunction Component.
            weight_entropy (float): See IMPALALossFunction Component.
            worker_sample_size (int): How many steps the actor will perform in the environment each sample-run.

        Keyword Args:
            type (str): One of "single", "actor" or "learner". Default: "single".
        """
        type_ = kwargs.pop("type", "single")
        assert type_ in ["single", "actor", "learner"]
        self.type = type_
        self.worker_sample_size = worker_sample_size

        # Network-spec by default is a "large architecture" IMPALA network.
        self.network_spec = kwargs.pop(
            "network_spec",
            dict(
                type=
                "rlgraph.components.neural_networks.impala.impala_networks.{}IMPALANetwork"
                .format("Large" if architecture == "large" else "Small")))
        if isinstance(self.network_spec, dict) and "type" in self.network_spec and \
                "IMPALANetwork" in self.network_spec["type"]:
            self.network_spec = default_dict(
                self.network_spec,
                dict(worker_sample_size=1 if self.type ==
                     "actor" else self.worker_sample_size + 1))

        # Depending on the job-type, remove the pieces from the Agent-spec/graph we won't need.
        self.exploration_spec = kwargs.pop("exploration_spec", None)
        optimizer_spec = kwargs.pop("optimizer_spec", None)
        observe_spec = kwargs.pop("observe_spec", None)

        self.feed_previous_action_through_nn = feed_previous_action_through_nn
        self.feed_previous_reward_through_nn = feed_previous_reward_through_nn

        # Run everything in a single process.
        if self.type == "single":
            environment_spec = environment_spec or self.default_environment_spec
            update_spec = kwargs.pop("update_spec", None)
        # Actors won't need to learn (no optimizer needed in graph).
        elif self.type == "actor":
            optimizer_spec = None
            update_spec = kwargs.pop("update_spec", dict(do_updates=False))
            environment_spec = environment_spec or self.default_environment_spec
        # Learners won't need to explore (act) or observe (insert into Queue).
        else:
            observe_spec = None
            update_spec = kwargs.pop("update_spec", None)
            environment_spec = None

        # Add previous-action/reward preprocessors to env-specific preprocessor spec.
        # TODO: remove this empty hard-coded preprocessor.
        self.preprocessing_spec = kwargs.pop(
            "preprocessing_spec",
            dict(
                type="dict-preprocessor-stack",
                preprocessors=dict(
                    # Flatten actions.
                    previous_action=[
                        dict(type="reshape",
                             flatten=True,
                             flatten_categories=kwargs.get(
                                 "action_space").num_categories)
                    ],
                    # Bump reward and convert to float32, so that it can be concatenated by the Concat layer.
                    previous_reward=[dict(type="reshape", new_shape=(1, ))])))

        # Limit communication in distributed mode between each actor and the learner (never between actors).
        execution_spec = kwargs.pop("execution_spec", None)
        if execution_spec is not None and execution_spec.get(
                "mode") == "distributed":
            default_dict(
                execution_spec["session_config"],
                dict(type="monitored-training-session",
                     allow_soft_placement=True,
                     device_filters=["/job:learner/task:0"] + ([
                         "/job:actor/task:{}".format(
                             execution_spec["distributed_spec"]["task_index"])
                     ] if self.type == "actor" else ["/job:learner/task:0"])))
            # If Actor, make non-chief in either case (even if task idx == 0).
            if self.type == "actor":
                execution_spec["distributed_spec"]["is_chief"] = False
                # Hard-set device to the CPU for actors.
                execution_spec["device_strategy"] = "custom"
                execution_spec[
                    "default_device"] = "/job:{}/task:{}/cpu".format(
                        self.type,
                        execution_spec["distributed_spec"]["task_index"])

        self.policy_spec = kwargs.pop("policy_spec", dict())
        # TODO: Create some auto-setting based on LSTM inside the NN.
        default_dict(
            self.policy_spec,
            dict(type="shared-value-function-policy",
                 deterministic=False,
                 reuse_variable_scope="shared-policy",
                 action_space=kwargs.get("action_space")))

        # Now that we fixed the Agent's spec, call the super constructor.
        super(IMPALAAgent,
              self).__init__(discount=discount,
                             preprocessing_spec=self.preprocessing_spec,
                             network_spec=self.network_spec,
                             policy_spec=self.policy_spec,
                             exploration_spec=self.exploration_spec,
                             optimizer_spec=optimizer_spec,
                             observe_spec=observe_spec,
                             update_spec=update_spec,
                             execution_spec=execution_spec,
                             name=kwargs.pop(
                                 "name", "impala-{}-agent".format(self.type)),
                             **kwargs)
        # Always use 1st learner as the parameter server for all policy variables.
        if self.execution_spec["mode"] == "distributed" and self.execution_spec[
                "distributed_spec"]["cluster_spec"]:
            self.policy.propagate_sub_component_properties(
                dict(device=dict(variables="/job:learner/task:0/cpu")))

        # Check whether we have an RNN.
        self.has_rnn = self.policy.neural_network.has_rnn()
        # Check, whether we are running with GPU.
        self.has_gpu = self.execution_spec["gpu_spec"]["gpus_enabled"] is True and \
            self.execution_spec["gpu_spec"]["num_gpus"] > 0

        # Some FIFO-queue specs.
        self.fifo_queue_keys = ["terminals", "states"] + \
                               (["actions"] if not self.feed_previous_action_through_nn else []) + \
                               (["rewards"] if not self.feed_previous_reward_through_nn else []) + \
                               ["action_probs"] + \
                               (["initial_internal_states"] if self.has_rnn else [])
        # Define FIFO record space.
        # Note that only states and internal_states (RNN) contain num-steps+1 items, all other sub-records only contain
        # num-steps items.
        self.fifo_record_space = Dict(
            {
                "terminals":
                bool,
                "action_probs":
                FloatBox(shape=(self.action_space.num_categories, )),
            },
            add_batch_rank=False,
            add_time_rank=self.worker_sample_size)
        self.fifo_record_space["states"] = self.state_space.with_time_rank(
            self.worker_sample_size + 1)
        # Add action and rewards to state or do they have an extra channel?
        if self.feed_previous_action_through_nn:
            self.fifo_record_space["states"]["previous_action"] = \
                self.action_space.with_time_rank(self.worker_sample_size + 1)
        else:
            self.fifo_record_space[
                "actions"] = self.action_space.with_time_rank(
                    self.worker_sample_size)
        if self.feed_previous_action_through_nn:
            self.fifo_record_space["states"]["previous_reward"] = FloatBox(
                add_time_rank=self.worker_sample_size + 1)
        else:
            self.fifo_record_space["rewards"] = FloatBox(
                add_time_rank=self.worker_sample_size)

        if self.has_rnn:
            self.fifo_record_space[
                "initial_internal_states"] = self.internal_states_space.with_time_rank(
                    False)

        # Create our FIFOQueue (actors will enqueue, learner(s) will dequeue).
        self.fifo_queue = FIFOQueue.from_spec(
            fifo_queue_spec or dict(capacity=1),
            reuse_variable_scope="shared-fifo-queue",
            only_insert_single_records=True,
            record_space=self.fifo_record_space,
            device="/job:learner/task:0/cpu"
            if self.execution_spec["mode"] == "distributed"
            and self.execution_spec["distributed_spec"]["cluster_spec"] else
            None)

        # Remove `states` key from input_spaces: not needed.
        del self.input_spaces["states"]

        # Add all our sub-components to the core.
        if self.type == "single":
            pass

        elif self.type == "actor":
            # No learning, no loss function.
            self.loss_function = None
            # A Dict Splitter to split things from the EnvStepper.
            self.env_output_splitter = ContainerSplitter(
                tuple_length=4, scope="env-output-splitter")

            self.states_dict_splitter = None

            # Slice some data from the EnvStepper (e.g only first internal states are needed).
            self.internal_states_slicer = Slice(scope="internal-states-slicer",
                                                squeeze=True)
            # Merge back to insert into FIFO.
            self.fifo_input_merger = ContainerMerger(*self.fifo_queue_keys)

            # Dummy Flattener to calculate action-probs space.
            dummy_flattener = ReShape(
                flatten=True,
                flatten_categories=self.action_space.num_categories)
            self.environment_stepper = EnvironmentStepper(
                environment_spec=environment_spec,
                actor_component_spec=ActorComponent(self.preprocessor,
                                                    self.policy,
                                                    self.exploration),
                state_space=self.state_space.with_batch_rank(),
                reward_space=
                float,  # TODO <- float64 for deepmind? may not work for other envs
                internal_states_space=self.internal_states_space,
                num_steps=self.worker_sample_size,
                add_previous_action_to_state=True,
                add_previous_reward_to_state=True,
                add_action_probs=True,
                action_probs_space=dummy_flattener.get_preprocessed_space(
                    self.action_space))
            sub_components = [
                self.environment_stepper, self.env_output_splitter,
                self.internal_states_slicer, self.fifo_input_merger,
                self.fifo_queue
            ]
        # Learner.
        else:
            self.environment_stepper = None

            # A Dict splitter to split up items from the queue.
            self.fifo_input_merger = None
            self.fifo_output_splitter = ContainerSplitter(
                *self.fifo_queue_keys, scope="fifo-output-splitter")
            self.states_dict_splitter = ContainerSplitter(
                *list(self.fifo_record_space["states"].keys()),
                scope="states-dict-splitter")
            self.internal_states_slicer = None

            self.transposer = Transpose(
                scope="transposer", device=dict(ops="/job:learner/task:0/cpu"))
            self.staging_area = StagingArea(num_data=len(self.fifo_queue_keys))

            # Create an IMPALALossFunction with some parameters.
            self.loss_function = IMPALALossFunction(
                discount=self.discount,
                weight_pg=weight_pg,
                weight_baseline=weight_baseline,
                weight_entropy=weight_entropy,
                slice_actions=self.feed_previous_action_through_nn,
                slice_rewards=self.feed_previous_reward_through_nn,
                device="/job:learner/task:0/gpu")

            self.policy.propagate_sub_component_properties(
                dict(device=dict(variables="/job:learner/task:0/cpu",
                                 ops="/job:learner/task:0/gpu")))
            for component in [
                    self.staging_area, self.preprocessor, self.optimizer
            ]:
                component.propagate_sub_component_properties(
                    dict(device="/job:learner/task:0/gpu"))

            sub_components = [
                self.fifo_output_splitter, self.fifo_queue,
                self.states_dict_splitter, self.transposer, self.staging_area,
                self.preprocessor, self.policy, self.loss_function,
                self.optimizer
            ]

        if self.type != "single":
            # Add all the agent's sub-components to the root.
            self.root_component.add_components(*sub_components)

            # Define the Agent's (root Component's) API.
            self.define_graph_api(*sub_components)

        if self.type != "single" and self.auto_build:
            if self.type == "learner":
                build_options = dict(
                    build_device_context="/job:learner/task:0/cpu",
                    pin_global_variable_device="/job:learner/task:0/cpu")
                self._build_graph([self.root_component],
                                  self.input_spaces,
                                  optimizer=self.optimizer,
                                  build_options=build_options)
            else:
                self._build_graph([self.root_component],
                                  self.input_spaces,
                                  optimizer=self.optimizer,
                                  build_options=None)

            self.graph_built = True

            if self.has_gpu:
                # Get 1st return op of API-method `stage` of sub-component `staging-area` (which is the stage-op).
                self.stage_op = self.root_component.sub_components["staging-area"].api_methods["stage"]. \
                    out_op_columns[0].op_records[0].op
                # Initialize the stage.
                self.graph_executor.monitored_session.run_step_fn(
                    lambda step_context: step_context.session.run(self.stage_op
                                                                  ))

                # TODO remove after full refactor.
                self.dequeue_op = self.root_component.sub_components["fifo-queue"].api_methods["get_records"]. \
                    out_op_columns[0].op_records[0].op
            if self.type == "actor":
                self.enqueue_op = self.root_component.sub_components["fifo-queue"].api_methods["insert_records"]. \
                    out_op_columns[0].op_records[0].op
Exemplo n.º 15
0
    def __init__(self,
                 discount=0.99,
                 fifo_queue_spec=None,
                 architecture="large",
                 environment_spec=None,
                 feed_previous_action_through_nn=True,
                 feed_previous_reward_through_nn=True,
                 weight_pg=None,
                 weight_baseline=None,
                 weight_entropy=None,
                 num_workers=1,
                 worker_sample_size=100,
                 dynamic_batching=False,
                 visualize=False,
                 **kwargs):
        """
        Args:
            discount (float): The discount factor gamma.
            architecture (str): Which IMPALA architecture to use. One of "small" or "large". Will be ignored if
                `network_spec` is given explicitly in kwargs. Default: "large".
            fifo_queue_spec (Optional[dict,FIFOQueue]): The spec for the FIFOQueue to use for the IMPALA algorithm.
            environment_spec (dict): The spec for constructing an Environment object for an actor-type IMPALA agent.
            feed_previous_action_through_nn (bool): Whether to add the previous action as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_action". Default: True.
            feed_previous_reward_through_nn (bool): Whether to add the previous reward as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_reward". Default: True.
            weight_pg (float): See IMPALALossFunction Component.
            weight_baseline (float): See IMPALALossFunction Component.
            weight_entropy (float): See IMPALALossFunction Component.
            num_workers (int): How many actors (workers) should be run in separate threads.
            worker_sample_size (int): How many steps the actor will perform in the environment each sample-run.
            dynamic_batching (bool): Whether to use the deepmind's custom dynamic batching op for wrapping the
                optimizer's step call. The batcher.so file must be compiled for this to work (see Docker file).
                Default: False.
            visualize (Union[int,bool]): Whether and how many workers to visualize.
                Default: False (no visualization).
        """
        # Now that we fixed the Agent's spec, call the super constructor.
        super(SingleIMPALAAgent, self).__init__(
            type="single",
            discount=discount,
            architecture=architecture,
            fifo_queue_spec=fifo_queue_spec,
            environment_spec=environment_spec,
            feed_previous_action_through_nn=feed_previous_action_through_nn,
            feed_previous_reward_through_nn=feed_previous_reward_through_nn,
            weight_pg=weight_pg,
            weight_baseline=weight_baseline,
            weight_entropy=weight_entropy,
            worker_sample_size=worker_sample_size,
            name=kwargs.pop("name", "impala-single-agent"),
            **kwargs)
        self.dynamic_batching = dynamic_batching
        self.num_workers = num_workers
        self.visualize = visualize

        # If we use dynamic batching, wrap the dynamic batcher around the policy's graph_fn that we
        # actually call below during our build.
        if self.dynamic_batching:
            self.policy = DynamicBatchingPolicy(policy_spec=self.policy,
                                                scope="")

        self.env_output_splitter = ContainerSplitter(
            tuple_length=3 if self.has_rnn is False else 4,
            scope="env-output-splitter")
        self.fifo_output_splitter = ContainerSplitter(
            *self.fifo_queue_keys, scope="fifo-output-splitter")
        self.states_dict_splitter = ContainerSplitter(
            *list(self.fifo_record_space["states"].keys(
            ) if isinstance(self.state_space, Dict) else "dummy"),
            scope="states-dict-splitter")

        self.staging_area = StagingArea(num_data=len(self.fifo_queue_keys))

        # Slice some data from the EnvStepper (e.g only first internal states are needed).
        if self.has_rnn:
            internal_states_slicer = Slice(scope="internal-states-slicer",
                                           squeeze=True)
        else:
            internal_states_slicer = None

        self.transposer = Transpose(scope="transposer")

        # Create an IMPALALossFunction with some parameters.
        self.loss_function = IMPALALossFunction(
            discount=self.discount,
            weight_pg=weight_pg,
            weight_baseline=weight_baseline,
            weight_entropy=weight_entropy,
            slice_actions=self.feed_previous_action_through_nn,
            slice_rewards=self.feed_previous_reward_through_nn)

        # Merge back to insert into FIFO.
        self.fifo_input_merger = ContainerMerger(*self.fifo_queue_keys)

        # Dummy Flattener to calculate action-probs space.
        dummy_flattener = ReShape(
            flatten=True, flatten_categories=self.action_space.num_categories)

        self.environment_steppers = list()
        for i in range(self.num_workers):
            environment_spec_ = copy.deepcopy(environment_spec)
            if self.visualize is True or (isinstance(self.visualize, int)
                                          and i + 1 <= self.visualize):
                environment_spec_["visualize"] = True

            # Force worker_sample_size for IMPALA NNs (LSTM) in env-stepper to be 1.
            policy_spec = copy.deepcopy(self.policy_spec)
            if isinstance(policy_spec, dict) and isinstance(policy_spec["network_spec"], dict) and \
                    "type" in policy_spec["network_spec"] and "IMPALANetwork" in policy_spec["network_spec"]["type"]:
                policy_spec["network_spec"]["worker_sample_size"] = 1

            env_stepper = EnvironmentStepper(
                environment_spec=environment_spec_,
                actor_component_spec=ActorComponent(
                    preprocessor_spec=self.preprocessing_spec,
                    policy_spec=policy_spec,
                    exploration_spec=self.exploration_spec),
                state_space=self.state_space.with_batch_rank(),
                action_space=self.action_space.with_batch_rank(),
                reward_space=float,
                internal_states_space=self.internal_states_space,
                num_steps=self.worker_sample_size,
                add_action=not self.feed_previous_action_through_nn,
                add_reward=not self.feed_previous_reward_through_nn,
                add_previous_action_to_state=self.
                feed_previous_action_through_nn,
                add_previous_reward_to_state=self.
                feed_previous_reward_through_nn,
                add_action_probs=True,
                action_probs_space=dummy_flattener.get_preprocessed_space(
                    self.action_space),
                scope="env-stepper-{}".format(i))
            if self.dynamic_batching:
                env_stepper.actor_component.policy.parent_component = None
                env_stepper.actor_component.policy = DynamicBatchingPolicy(
                    policy_spec=env_stepper.actor_component.policy, scope="")
                env_stepper.actor_component.add_components(
                    env_stepper.actor_component.policy)

            self.environment_steppers.append(env_stepper)

        # Create the QueueRunners (one for each env-stepper).
        self.queue_runner = QueueRunner(
            self.fifo_queue,
            "step",
            -1,  # -1: Take entire return value of API-method `step` as record to insert.
            self.env_output_splitter,
            self.fifo_input_merger,
            internal_states_slicer,
            *self.environment_steppers)

        sub_components = [
            self.fifo_output_splitter, self.fifo_queue, self.queue_runner,
            self.transposer, self.staging_area, self.preprocessor,
            self.states_dict_splitter, self.policy, self.loss_function,
            self.optimizer
        ]

        # Add all the agent's sub-components to the root.
        self.root_component.add_components(*sub_components)

        # Define the Agent's (root Component's) API.
        self.define_graph_api()

        if self.auto_build:
            self._build_graph([self.root_component],
                              self.input_spaces,
                              optimizer=self.optimizer,
                              build_options=None)
            self.graph_built = True

            if self.has_gpu:
                # Get 1st return op of API-method `stage` of sub-component `staging-area` (which is the stage-op).
                self.stage_op = self.root_component.sub_components["staging-area"].api_methods["stage"]. \
                    out_op_columns[0].op_records[0].op
                # Initialize the stage.
                self.graph_executor.monitored_session.run_step_fn(
                    lambda step_context: step_context.session.run(self.stage_op
                                                                  ))
                # TODO remove after full refactor.
                self.dequeue_op = self.root_component.sub_components["fifo-queue"].api_methods["get_records"]. \
                    out_op_columns[0].op_records[0].op