class ResidualBlock(Layer): def __init__(self, dilation_rate, nb_filters, kernel_size, padding, activation='relu', dropout_rate=0, kernel_initializer='he_normal', use_batch_norm=False, last_block=True, **kwargs): # type: (int, int, int, str, str, float, str, bool, dict) -> None """Defines the residual block for the WaveNet TCN Args: x: The previous layer in the model training: boolean indicating whether the layer should behave in training mode or in inference mode dilation_rate: The dilation power of 2 we are using for this residual block nb_filters: The number of convolutional filters to use in this block kernel_size: The size of the convolutional kernel padding: The padding used in the convolutional layers, 'same' or 'causal'. activation: The final activation used in o = Activation(x + F(x)) dropout_rate: Float between 0 and 1. Fraction of the input units to drop. kernel_initializer: Initializer for the kernel weights matrix (Conv1D). use_batch_norm: Whether to use batch normalization in the residual layers or not. kwargs: Any initializers for Layer class. """ self.dilation_rate = dilation_rate self.nb_filters = nb_filters self.kernel_size = kernel_size self.padding = padding self.activation = activation self.dropout_rate = dropout_rate self.use_batch_norm = use_batch_norm self.kernel_initializer = kernel_initializer self.last_block = last_block super(ResidualBlock, self).__init__(**kwargs) def _add_and_activate_layer(self, layer): """Helper function for building layer Args: layer: Appends layer to internal layer list and builds it based on the current output shape of ResidualBlock. Updates current output shape. """ self.residual_layers.append(layer) self.residual_layers[-1].build(self.res_output_shape) self.res_output_shape = self.residual_layers[-1].compute_output_shape( self.res_output_shape) def build(self, input_shape): with K.name_scope( self.name ): # name scope used to make sure weights get unique names self.residual_layers = list() self.res_output_shape = input_shape for k in range(2): name = 'conv1D_{}'.format(k) with K.name_scope( name ): # name scope used to make sure weights get unique names self._add_and_activate_layer( Conv1D(filters=self.nb_filters, kernel_size=self.kernel_size, dilation_rate=self.dilation_rate, padding=self.padding, name=name, kernel_initializer=self.kernel_initializer)) if self.use_batch_norm: # TODO should be WeightNorm here, but using batchNorm instead self._add_and_activate_layer(BatchNormalization()) self._add_and_activate_layer(Activation('relu')) self._add_and_activate_layer( SpatialDropout1D(rate=self.dropout_rate)) if not self.last_block: # 1x1 conv to match the shapes (channel dimension). name = 'conv1D_{}'.format(k + 1) with K.name_scope(name): # make and build this layer separately because it directly uses input_shape self.shape_match_conv = Conv1D( filters=self.nb_filters, kernel_size=1, padding='same', name=name, kernel_initializer=self.kernel_initializer) else: self.shape_match_conv = Lambda(lambda x: x, name='identity') self.shape_match_conv.build(input_shape) self.res_output_shape = self.shape_match_conv.compute_output_shape( input_shape) self.final_activation = Activation(self.activation) self.final_activation.build( self.res_output_shape) # probably isn't necessary # this is done to force keras to add the layers in the list to self._layers for layer in self.residual_layers: self.__setattr__(layer.name, layer) super(ResidualBlock, self).build( input_shape) # done to make sure self.built is set True def call(self, inputs, training=None): """ Returns: A tuple where the first element is the residual model tensor, and the second is the skip connection tensor. """ x = inputs for layer in self.residual_layers: if isinstance(layer, SpatialDropout1D): x = layer(x, training=training) else: x = layer(x) x2 = self.shape_match_conv(inputs) res_x = add([x2, x]) return [self.final_activation(res_x), x] def compute_output_shape(self, input_shape): return [self.res_output_shape, self.res_output_shape]
class TCN(Layer): """Creates a TCN layer. Input shape: A tensor of shape (batch_size, timesteps, input_dim). Args: nb_filters: The number of filters to use in the convolutional layers. kernel_size: The size of the kernel to use in each convolutional layer. dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64]. nb_stacks : The number of stacks of residual blocks to use. padding: The padding to use in the convolutional layers, 'causal' or 'same'. use_skip_connections: Boolean. If we want to add skip connections from input to each residual block. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. activation: The activation used in the residual blocks o = Activation(x + F(x)). dropout_rate: Float between 0 and 1. Fraction of the input units to drop. kernel_initializer: Initializer for the kernel weights matrix (Conv1D). use_batch_norm: Whether to use batch normalization in the residual layers or not. kwargs: Any other arguments for configuring parent class Layer. For example "name=str", Name of the model. Use unique names when using multiple TCN. Returns: A TCN layer. """ def __init__(self, nb_filters=64, kernel_size=2, nb_stacks=1, dilations=(1, 2, 4, 8, 16, 32), padding='causal', use_skip_connections=True, dropout_rate=0.0, return_sequences=False, activation='linear', kernel_initializer='he_normal', use_batch_norm=False, **kwargs): self.return_sequences = return_sequences self.dropout_rate = dropout_rate self.use_skip_connections = use_skip_connections self.dilations = dilations self.nb_stacks = nb_stacks self.kernel_size = kernel_size self.nb_filters = nb_filters self.activation = activation self.padding = padding self.kernel_initializer = kernel_initializer self.use_batch_norm = use_batch_norm if padding != 'causal' and padding != 'same': raise ValueError( "Only 'causal' or 'same' padding are compatible for this layer." ) if not isinstance(nb_filters, int): print('An interface change occurred after the version 2.1.2.') print('Before: tcn.TCN(x, return_sequences=False, ...)') print('Now should be: tcn.TCN(return_sequences=False, ...)(x)') print( 'The alternative is to downgrade to 2.1.2 (pip install keras-tcn==2.1.2).' ) raise Exception() # initialize parent class super(TCN, self).__init__(**kwargs) def build(self, input_shape): self.main_conv1D = Conv1D(filters=self.nb_filters, kernel_size=1, padding=self.padding, kernel_initializer=self.kernel_initializer) self.main_conv1D.build(input_shape) # member to hold current output shape of the layer for building purposes self.build_output_shape = self.main_conv1D.compute_output_shape( input_shape) # list to hold all the member ResidualBlocks self.residual_blocks = list() total_num_blocks = self.nb_stacks * len(self.dilations) if not self.use_skip_connections: total_num_blocks += 1 # cheap way to do a false case for below for s in range(self.nb_stacks): for d in self.dilations: self.residual_blocks.append( ResidualBlock(dilation_rate=d, nb_filters=self.nb_filters, kernel_size=self.kernel_size, padding=self.padding, activation=self.activation, dropout_rate=self.dropout_rate, use_batch_norm=self.use_batch_norm, kernel_initializer=self.kernel_initializer, last_block=len(self.residual_blocks) + 1 == total_num_blocks, name='residual_block_{}'.format( len(self.residual_blocks)))) # build newest residual block self.residual_blocks[-1].build(self.build_output_shape) self.build_output_shape = self.residual_blocks[ -1].res_output_shape # this is done to force keras to add the layers in the list to self._layers for layer in self.residual_blocks: self.__setattr__(layer.name, layer) self.lambda_layer = Lambda(lambda tt: tt[:, -1, :]) self.lambda_ouput_shape = self.lambda_layer.compute_output_shape( self.build_output_shape) def compute_output_shape(self, input_shape): """ Overridden in case keras uses it somewhere... no idea. Just trying to avoid future errors. """ if not self.built: self.build(input_shape) if not self.return_sequences: return self.lambda_ouput_shape else: return self.build_output_shape def call(self, inputs, training=None): x = inputs x = self.main_conv1D(x) skip_connections = list() for layer in self.residual_blocks: x, skip_out = layer(x, training=training) skip_connections.append(skip_out) if self.use_skip_connections: x = add(skip_connections) if not self.return_sequences: x = self.lambda_layer(x) return x def get_config(self): """ Returns the config of a the layer. This is used for saving and loading from a model :return: python dictionary with specs to rebuild layer """ config = super(TCN, self).get_config() config['nb_filters'] = self.nb_filters config['kernel_size'] = self.kernel_size config['nb_stacks'] = self.nb_stacks config['dilations'] = self.dilations config['padding'] = self.padding config['use_skip_connections'] = self.use_skip_connections config['dropout_rate'] = self.dropout_rate config['return_sequences'] = self.return_sequences config['activation'] = self.activation config['use_batch_norm'] = self.use_batch_norm config['kernel_initializer'] = self.kernel_initializer return config
class ClockworkRNN(Layer): """Clockwork RNN ([Koutnik et al., 2014](https://arxiv.org/abs/1402.3511)). Constructs a CW-RNN from RNNs of a given type. # Arguments periods: List of positive integers. The periods of each internal RNN. units_per_period: Positive integer or list of positive integers. Number of units for each internal RNN. If list, it must have the same length as `periods`. input_shape: Shape of the input data. output_units: Positive integer. Dimensionality of the output space. output_activation: String or callable. Activation function to use. If you don't specify anything, no activation is applied (i.e., "linear" activation: `a(x) = x`). return_sequences: Boolean (default False). Whether to return the last output in the output sequence, or the full sequence. sort_ascending: Boolean (default False). Whether to sort the periods in ascending or descending order (default, as in the original paper). include_top: Whether to include the fully-connected layer at the top of the network. dense_kwargs: Dictionary. Optional arguments for the trailing Dense unit (`activation` and `units` keys will be ignored). rnn_dtype: The type of RNN to use as clockwork layer. Can be a string ("SimpleRNN", "GRU", "LSTM", "CuDNNGRU", "CuDNNLSTM") or any RNN subclass. rnn_kwargs: Dictionary. Optional arguments for the internal RNNs (`return_sequences` and `return_state` will be ignored). """ def __init__(self, periods, units_per_period, output_units, output_activtion='linear', return_sequences=False, sort_ascending=False, include_top=True, dense_kwargs=None, rnn_dtype="SimpleRNN", rnn_kwargs=None, **kwargs): if type(rnn_dtype) is str: self.rnn_dtype = getattr(layers, rnn_dtype) else: self.rnn_dtype = rnn_dtype ClockworkRNN.__name__ = "Clockwork" + self.rnn_dtype.__name__ super(ClockworkRNN, self).__init__(**kwargs) if type(units_per_period) is list: self.units_per_period = units_per_period else: self.units_per_period = [units_per_period] * len(periods) self.periods = periods self.rnn_kwargs = rnn_kwargs or {} self.rnn_kwargs['return_sequences'] = True self.rnn_kwargs['return_state'] = False self.rnn_kwargs.pop("units", True) self.dense_kwargs = dense_kwargs or {} self.dense_kwargs['activation'] = output_activtion self.dense_kwargs['units'] = output_units self.include_top = include_top self.return_sequences = return_sequences self.sort_ascending = sort_ascending self.blocks = [] def build(self, input_shape): last_shape = input_shape output_shapes = [] for period, units in sorted(zip(self.periods, self.units_per_period), reverse=not self.sort_ascending): block, output_shape, last_shape = self._build_clockwork_block( units, period, last_shape) output_shapes.append(output_shape) self.blocks.append(block) self.concat_all = Concatenate() self.concat_all.build(output_shapes) last_shape = self.concat_all.compute_output_shape(output_shapes) if not self.return_sequences: self.lambda_last = Lambda(lambda x: x[:, -1]) self.lambda_last.build(last_shape) last_shape = self.lambda_last.compute_output_shape(last_shape) if self.include_top: if self.return_sequences: self.dense = TimeDistributed(Dense(**self.dense_kwargs)) else: self.dense = Dense(**self.dense_kwargs) self.dense.build(last_shape) self._trainable_weights.extend(self.dense.trainable_weights) last_shape = self.dense.compute_output_shape(last_shape) super(ClockworkRNN, self).build(input_shape) def call(self, x): rnns = [] to_next_block = x for block in self.blocks: to_dense, to_next_block = self._call_clockwork_block( to_next_block, *block) rnns.append(to_dense) out = self.concat_all(rnns) if not self.return_sequences: out = self.lambda_last(out) if self.include_top: out = self.dense(out) return out def compute_output_shape(self, input_shape): if self.include_top: out_dim = self.dense_kwargs['units'] else: out_dim = np.sum(self.units_per_period) if self.return_sequences: return input_shape[:-1] + (out_dim,) else: return input_shape[:-2] + (out_dim,) def _delay(self, x): return K.temporal_padding(x, (1, 0))[:, :-1] def _crop(self, x, timesteps): return x[:, :K.cast(timesteps, "int32")] def _build_clockwork_block(self, units, period, input_shape): output_shape = input_shape[:-1] + (units,) pool = MaxPooling1D(1, period) rnn = self.rnn_dtype(units=units, **self.rnn_kwargs) unpool = UpSampling1D(period) crop = Lambda(lambda x: self._crop(x[0], x[1]), output_shape=output_shape[1:]) delay = Lambda(lambda x: self._delay(x), output_shape=output_shape[1:]) concat = Concatenate() block = (pool, rnn, unpool, crop, delay, concat) pool.build(input_shape) pool_output_shape = pool.compute_output_shape(input_shape) rnn.build(pool_output_shape) self._trainable_weights.extend(rnn.trainable_weights) rnn_output_shape = rnn.compute_output_shape(pool_output_shape) unpool.build(rnn_output_shape) crop.build([unpool.compute_output_shape(rnn_output_shape), ()]) delay.build(output_shape) concat.build([input_shape, output_shape]) return block, output_shape, \ concat.compute_output_shape([input_shape, output_shape]) def _call_clockwork_block(self, x, pool, rnn, unpool, crop, delay, concat): pooled = pool(x) rnn_out = rnn(pooled) unpooled = unpool(rnn_out) to_dense = crop([unpooled, K.shape(x)[1]]) delayed = delay(to_dense) to_next_block = concat([x, delayed]) return to_dense, to_next_block def get_config(self): config = super(ClockworkRNN, self).get_config() config['units_per_period'] = self.units_per_period config['periods'] = self.periods config['rnn_dtype'] = self.rnn_dtype.__name__ config['rnn_kwargs'] = self.rnn_kwargs config['dense_kwargs'] = self.dense_kwargs config['include_top'] = self.include_top config['return_sequences'] = self.return_sequences config['sort_ascending'] = self.sort_ascending return config