class SACValueNetwork(ValueFunction): """ Value network for SAC which must be able to merge different input types. """ def __init__(self, scope="sac-value-network", **kwargs): super(SACValueNetwork, self).__init__(scope=scope, **kwargs) # Add all sub-components to this one. if self.image_stack is not None: self.add_components(self.image_stack) self.concat_layer = ConcatLayer() self.add_components(self.concat_layer, self.dense_stack) def build_value_function(self): """ Builds a dense stack and optionally an image stack. """ if self.use_image_stack: image_components = [] dense_components = [] for layer_spec in self.network_spec: if layer_spec["type"] in ["conv2d", "reshape"]: image_components.append(Layer.from_spec(layer_spec)) self.image_stack = Stack(image_components, scope="image-stack") # Remainings layers should be dense. for layer_spec in self.network_spec[len(image_components):]: assert layer_spec["type"] == "dense", "Only expecting dense layers after image " \ "stack but found spec: {}.".format(layer_spec) dense_components.append(layer_spec) dense_components.append(self.value_layer) self.dense_stack = Stack(dense_components, scope="dense-stack") else: # Assume dense network otherwise -> onyl a single stack. dense_components = [] for layer_spec in self.network_spec: assert layer_spec["type"] == "dense", "Only dense layers allowed if not using" \ " image stack in this network." dense_components.append(Layer.from_spec(layer_spec)) dense_components.append(self.value_layer) self.dense_stack = Stack(dense_components, scope="dense-stack") @rlgraph_api def state_action_value(self, states, actions, internal_states=None): """ Computes Q(s,a) by passing states and actions through one or multiple processing stacks.. """ if self.use_image_stack: image_processing_output = self.image_stack.call(states) state_actions = self.concat_layer.call(image_processing_output, actions) dense_output = self.dense_stack.call(state_actions) else: # Concat states and actions, then pass through. state_actions = self.concat_layer.call(states, actions) dense_output = self.dense_stack.call(state_actions) return dense_output
def build_image_processing_stack(): """ Constructs a ReShape preprocessor to fold the time rank into the batch rank. Then builds the 3 sequential Conv2D blocks that process the image information. Each of these 3 blocks consists of: - 1 Conv2D layer followed by a MaxPool2D - 2 residual blocks, each of which looks like: - ReLU + Conv2D + ReLU + Conv2D + element-wise add with original input Then adds: ReLU + fc(256) + ReLU. """ # Collect components for image stack before unfolding time-rank going into main LSTM. sub_components = list() # Divide by 255 sub_components.append(Divide(divisor=255, scope="divide-255")) for i, num_filters in enumerate([16, 32, 32]): # Conv2D plus MaxPool2D. conv2d_plus_maxpool = Stack( Conv2DLayer(filters=num_filters, kernel_size=3, strides=1, padding="same"), MaxPool2DLayer(pool_size=3, strides=2, padding="same"), scope="conv-max" ) # Single unit for the residual layers (ReLU + Conv2D 3x3 stride=1). residual_unit = Stack( NNLayer(activation="relu"), # single ReLU Conv2DLayer(filters=num_filters, kernel_size=3, strides=1, padding="same"), scope="relu-conv" ) # Residual Layer. residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2) # Repeat same residual layer 2x. residual_repeater = RepeaterStack(sub_component=residual_layer, repeats=2) sub_components.append(Stack(conv2d_plus_maxpool, residual_repeater, scope="conv-unit-{}".format(i))) # A Flatten preprocessor and then an fc block (surrounded by ReLUs) and a time-rank-unfolding. sub_components.extend([ ReShape(flatten=True, scope="flatten"), # Flattener (to flatten Conv2D output for the fc layer). NNLayer(activation="relu", scope="relu-1"), # ReLU 1 DenseLayer(units=256), # Dense layer. NNLayer(activation="relu", scope="relu-2"), # ReLU 2 ]) image_stack = Stack(sub_components, scope="image-stack") return image_stack
def build_image_processing_stack(): """ Constructs a ReShape preprocessor to fold the time rank into the batch rank. Then builds the 2 Conv2D Layers followed by ReLUs. Then adds: fc(256) + ReLU. """ # Collect components for image stack before unfolding time-rank going into main LSTM. sub_components = list() # Divide by 255 sub_components.append(Divide(divisor=255, scope="divide-255")) for i, (num_filters, kernel_size, stride) in enumerate(zip([16, 32], [8, 4], [4, 2])): # Conv2D plus ReLU activation function. conv2d = Conv2DLayer( filters=num_filters, kernel_size=kernel_size, strides=stride, padding="same", activation="relu", scope="conv2d-{}".format(i) ) sub_components.append(conv2d) # A Flatten preprocessor and then an fc block (surrounded by ReLUs) and a time-rank-unfolding. sub_components.extend([ ReShape(flatten=True, scope="flatten"), # Flattener (to flatten Conv2D output for the fc layer). DenseLayer(units=256), # Dense layer. NNLayer(activation="relu", scope="relu-before-lstm"), ]) #stack_before_unfold = <- formerly known as image_stack = Stack(sub_components, scope="image-stack") return image_stack
def test_reshape_with_batch_and_time_ranks_with_folding_and_unfolding_0D_shape( self): # Flip time and batch rank via folding, then unfolding. in_space = FloatBox(shape=(), add_batch_rank=True, add_time_rank=True, time_major=True) reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank") reshape_unfold = ReShape(unfold_time_rank=True, scope="unfold-time-rank", time_major=True) def custom_apply(self_, inputs): folded = reshape_fold.apply(inputs) unfolded = reshape_unfold.apply(folded, inputs) return unfolded stack = Stack(reshape_fold, reshape_unfold, api_methods={("apply", custom_apply)}) test = ComponentTest(component=stack, input_spaces=dict(inputs=in_space)) # seq-len=16, batch-size=8 inputs = in_space.sample(size=(16, 8)) test.test(("apply", inputs), expected_outputs=inputs)
def test_reshape_with_batch_and_time_ranks_with_folding_and_explicit_unfolding( self): time_rank = 8 in_space = FloatBox(shape=(2, 3), add_batch_rank=True, add_time_rank=True, time_major=True) reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank") reshape_unfold = ReShape(unfold_time_rank=time_rank, scope="unfold-time-rank", time_major=True) def custom_call(self_, inputs): folded = reshape_fold.call(inputs) unfolded = reshape_unfold.call( folded) # no need for orig input here as unfolding is explicit return unfolded stack = Stack(reshape_fold, reshape_unfold, api_methods={("call", custom_call)}) test = ComponentTest(component=stack, input_spaces=dict(inputs=in_space)) # seq-len=time_rank, batch-size=n inputs = in_space.sample(size=(time_rank, 12)) test.test(("call", inputs), expected_outputs=inputs)
def test_reshape_with_batch_and_time_ranks_and_with_folding_and_unfolding( self): # Flip time and batch rank via folding, then unfolding. in_space = FloatBox(shape=(3, 2), add_batch_rank=True, add_time_rank=True, time_major=False) reshape_fold = ReShape(fold_time_rank=True) reshape_unfold = ReShape(unfold_time_rank=True, time_major=False) def custom_call(self_, inputs): folded = reshape_fold.call(inputs) unfolded = reshape_unfold.call(folded, inputs) return unfolded stack = Stack(reshape_fold, reshape_unfold, api_methods={("call", custom_call)}) test = ComponentTest(component=stack, input_spaces=dict(inputs=in_space)) # batch-size=4, seq-len=2 inputs = in_space.sample(size=(4, 2)) test.test(("call", inputs), expected_outputs=inputs)
def test_reshape_with_batch_vs_time_flipping_with_folding_and_unfolding( self): # Flip time and batch rank via folding, then unfolding. in_space = FloatBox(shape=(3, 2), add_batch_rank=True, add_time_rank=True, time_major=False) reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank") reshape_unfold = ReShape(unfold_time_rank=True, flip_batch_and_time_rank=True, scope="unfold-time-rank-with-flip", time_major=True) def custom_apply(self, inputs): folded = reshape_fold.apply(inputs) unfolded = reshape_unfold.apply(folded, inputs) return unfolded stack = Stack(reshape_fold, reshape_unfold, api_methods={("apply", custom_apply)}) test = ComponentTest(component=stack, input_spaces=dict(inputs=in_space)) #test.test("reset") # batch-size=4, seq-len=2 inputs = in_space.sample(size=(4, 2)) # Flip the first two dimensions. expected = np.transpose(inputs, axes=(1, 0, 2, 3)) test.test(("apply", inputs), expected_outputs=expected)
def build_text_processing_stack(): """ Helper function to build the text processing pipeline for both the large and small architectures, consisting of: - ReShape preprocessor to fold the incoming time rank into the batch rank. - StringToHashBucket Layer taking a batch of sentences and converting them to an indices-table of dimensions: cols=length of longest sentences in input rows=number of items in the batch The cols dimension could be interpreted as the time rank into a consecutive LSTM. The StringToHashBucket Component returns the sequence length of each batch item for exactly that purpose. - Embedding Lookup Layer of embedding size 20 and number of rows == num_hash_buckets (see previous layer). - LSTM processing the batched sequences of words coming from the embedding layer as batches of rows. """ num_hash_buckets = 1000 # Create a hash bucket from the sentences and use that bucket to do an embedding lookup (instead of # a vocabulary). string_to_hash_bucket = StringToHashBucket( num_hash_buckets=num_hash_buckets) embedding = EmbeddingLookup(embed_dim=20, vocab_size=num_hash_buckets, pad_empty=True) # The time rank for the LSTM is now the sequence of words in a sentence, NOT the original env time rank. # We will only use the last output of the LSTM-64 for further processing as that is the output after having # seen all words in the sentence. # The original env stepping time rank is currently folded into the batch rank and must be unfolded again before # passing it into the main LSTM. lstm64 = LSTMLayer(units=64, scope="lstm-64", time_major=False) tuple_splitter = ContainerSplitter(tuple_length=2, scope="tuple-splitter") def custom_apply(self, inputs): hash_bucket, lengths = self.sub_components[ "string-to-hash-bucket"].apply(inputs) embedding_output = self.sub_components["embedding-lookup"].apply( hash_bucket) # Return only the last output (sentence of words, where we are not interested in intermediate results # where the LSTM has not seen the entire sentence yet). # Last output is the final internal h-state (slot 1 in the returned LSTM tuple; slot 0 is final c-state). lstm_output = self.sub_components["lstm-64"].apply( embedding_output, sequence_length=lengths) lstm_final_internals = lstm_output["last_internal_states"] # Need to split once more because the LSTM state is always a tuple of final c- and h-states. _, lstm_final_h_state = self.sub_components[ "tuple-splitter"].split(lstm_final_internals) return lstm_final_h_state text_processing_stack = Stack(string_to_hash_bucket, embedding, lstm64, tuple_splitter, api_methods={("apply", custom_apply)}, scope="text-stack") return text_processing_stack
def build_value_function(self): """ Builds a dense stack and optionally an image stack. """ if self.use_image_stack: image_components = [] dense_components = [] for layer_spec in self.network_spec: if layer_spec["type"] in ["conv2d", "reshape"]: image_components.append(Layer.from_spec(layer_spec)) self.image_stack = Stack(image_components, scope="image-stack") # Remainings layers should be dense. for layer_spec in self.network_spec[len(image_components):]: assert layer_spec["type"] == "dense", "Only expecting dense layers after image " \ "stack but found spec: {}.".format(layer_spec) dense_components.append(layer_spec) dense_components.append(self.value_layer) self.dense_stack = Stack(dense_components, scope="dense-stack") else: # Assume dense network otherwise -> onyl a single stack. dense_components = [] for layer_spec in self.network_spec: assert layer_spec["type"] == "dense", "Only dense layers allowed if not using" \ " image stack in this network." dense_components.append(Layer.from_spec(layer_spec)) dense_components.append(self.value_layer) self.dense_stack = Stack(dense_components, scope="dense-stack")