def __call__( self, inputs: tf.Tensor, state: PhasedUGRNNStateTuple, scope: Optional[str] = None ) -> Tuple[PhasedUGRNNOutputTuple, PhasedUGRNNStateTuple]: # Unpack the previous state prev_state, time = state scope = scope if scope is not None else type(self).__name__ with tf.compat.v1.variable_scope(scope): # Apply the standard UGRNN update, [B, D] next_cell_state = ugrnn(inputs=inputs, state=prev_state, W_transform=self.W_transform, b_transform=self.b_transform, activation=self._activation) # Apply regularization noise next_cell_state = apply_noise(next_cell_state, scale=self._recurrent_noise) # Apply the time oscillation gate kt = time_gate(time=time, period=self.period, on_fraction=self._on_fraction, shift=self.shift, leak_rate=self._leak_rate) next_state = kt * next_cell_state + (1 - kt) * prev_state phased_state = PhasedUGRNNStateTuple(next_state, time + 1) phased_output = PhasedUGRNNOutputTuple(next_state, kt) return phased_output, phased_state
def __call__(self, inputs: tf.Tensor, state: tf.Tensor, scope=None) -> Tuple[tf.Tensor, tf.Tensor]: scope = scope if scope is not None else type(self).__name__ with tf.compat.v1.variable_scope(scope): # Apply the standard UGRNN update, [B, D] next_state = ugrnn(inputs=inputs, state=state, W_transform=self.W_transform, b_transform=self.b_transform, activation=self._activation) # Apply regularization noise next_state = apply_noise(next_state, scale=self._recurrent_noise) return next_state, next_state
def __call__( self, inputs: tf.Tensor, state: SkipUGRNNStateTuple, scope=None) -> Tuple[SkipUGRNNOutputTuple, SkipUGRNNStateTuple]: # Unpack the previous state prev_state, prev_cum_state_update_prob = state scope = scope if scope is not None else type(self).__name__ with tf.compat.v1.variable_scope(scope): # Apply the standard UGRNN update, [B, D] next_cell_state = ugrnn(inputs=inputs, state=prev_state, W_transform=self.W_transform, b_transform=self.b_transform, activation=self._activation) # Apply regularization noise next_cell_state = apply_noise(next_cell_state, scale=self._recurrent_noise) # Apply the state update gate. This is the Skip portion. # We first compute the state update gate. This is a binary version of the cumulative state update prob. state_update_gate = binarize( prev_cum_state_update_prob) # A [B, 1] binary tensor # Apply the binary state update gate to get the next state, [B, D] next_state = state_update_gate * next_cell_state + ( 1 - state_update_gate) * prev_state # Compute the next state update probability (clipped into the range [0, 1]) delta_state_update_prob = tf.math.sigmoid( tf.matmul(next_state, self.W_state) + self.b_state) # [B, 1] cum_prob_candidate = prev_cum_state_update_prob + tf.minimum( delta_state_update_prob, 1.0 - prev_cum_state_update_prob) cum_state_update_prob = state_update_gate * delta_state_update_prob + ( 1 - state_update_gate) * cum_prob_candidate skip_state = SkipUGRNNStateTuple(next_state, cum_state_update_prob) skip_output = SkipUGRNNOutputTuple(next_state, state_update_gate, delta_state_update_prob) return skip_output, skip_state
def __call__(self, inputs: tf.Tensor, state: tf.Tensor, scope=None) -> Tuple[BudgetOutput, tf.Tensor]: scope = scope if scope is not None else type(self).__name__ with tf.compat.v1.variable_scope(scope): # Split inputs into two [B, D] tensors inputs, prev_state = tf.split(inputs, num_or_size_splits=2, axis=-1) states_concat = tf.concat([state, prev_state], axis=-1) # [B, 2 * D] fusion = tf.matmul(states_concat, self.W_fusion) # [B, D] fusion_gate = self._fusion_mask * (1.0 - tf.math.sigmoid(fusion + self.b_fusion)) # [B, D] fused_state = (1.0 - fusion_gate) * state + fusion_gate * prev_state # Apply the standard UGRNN update, [B, D] next_state = ugrnn(inputs=inputs, state=fused_state, W_transform=self.W_transform, b_transform=self.b_transform, activation=self._activation) # Apply regularization_noise next_state = apply_noise(next_state, scale=self._recurrent_noise) return BudgetOutput(output=next_state, fusion=fused_state), next_state
def _make_model(self, is_train: bool): """ Builds the computation graph for this model. """ state_size = self.hypers.model_params['state_size'] batch_size = tf.shape(self._placeholders[INPUTS])[0] activation_noise = self._placeholders[ACTIVATION_NOISE] dropout_keep_rate = self._placeholders[DROPOUT_KEEP_RATE] # Apply input noise inputs = apply_noise(self._placeholders[INPUTS], scale=activation_noise) # Embed the input sequence into a [B, T, D] tensor embeddings, _ = dense( inputs=inputs, units=state_size, activation=self.hypers.model_params['embedding_activation'], use_bias=True, activation_noise=activation_noise, name=EMBEDDING_NAME) # Apply the transformation layer. The output is a [B, T, D] tensor of transformed inputs for each model type. if self.model_type == SequenceModelType.NBOW: # Apply the MLP transformation. Result is a [B, T, D] tensor transformed, _ = mlp( inputs=embeddings, output_size=state_size, hidden_sizes=self.hypers.model_params['mlp_hidden_units'], activations=self.hypers.model_params['mlp_activation'], dropout_keep_rate=dropout_keep_rate, activation_noise=activation_noise, should_activate_final=True, should_bias_final=True, should_dropout_final=True, name=TRANSFORM_NAME) # Compute weights for aggregation layer, [B, T, 1] aggregation_weights, _ = dense(inputs=transformed, units=1, activation='sigmoid', activation_noise=activation_noise, use_bias=True, name=AGGREGATION_NAME) # Pool the data in a successive fashion, [B, T, D] transformed = successive_pooling( inputs=transformed, aggregation_weights=aggregation_weights, name='{0}-pool'.format(AGGREGATION_NAME), seq_length=self.metadata[SEQ_LENGTH]) elif self.model_type == SequenceModelType.CONV: # Apply the convolution filter, [B, T, D] filtered = conv_1d( inputs=embeddings, filter_width=self.hypers.model_params['conv_filter_width'], stride=1, activation=self.hypers.model_params['conv_activation'], activation_noise=activation_noise, dropout_keep_rate=dropout_keep_rate, use_dropout=True, name=TRANSFORM_NAME) # Compute the aggregation weights, [B, T, 1] aggregation_weights, _ = dense(inputs=filtered, units=1, activation='sigmoid', activation_noise=activation_noise, use_bias=True, name=AGGREGATION_NAME) # Pool the data in a successive fashion, [B, T, D] transformed = successive_pooling( inputs=filtered, aggregation_weights=aggregation_weights, name='{0}-pool'.format(AGGREGATION_NAME), seq_length=self.metadata[SEQ_LENGTH]) elif self.model_type == SequenceModelType.RNN: cell = make_rnn_cell( cell_class=CellClass.STANDARD, cell_type=CellType[ self.hypers.model_params['rnn_cell_type'].upper()], units=state_size, activation=self.hypers.model_params['rnn_activation'], recurrent_noise=activation_noise, name=RNN_CELL_NAME) initial_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32) rnn_outputs, state = tf.compat.v1.nn.dynamic_rnn( cell=cell, inputs=embeddings, initial_state=initial_state, dtype=tf.float32, scope=TRANSFORM_NAME) transformed = rnn_outputs # [B, T, D] elif self.model_type == SequenceModelType.SKIP_RNN: cell = make_rnn_cell( cell_class=CellClass.SKIP, cell_type=CellType[ self.hypers.model_params['rnn_cell_type'].upper()], units=state_size, activation=self.hypers.model_params['rnn_activation'], recurrent_noise=activation_noise, name=RNN_CELL_NAME) initial_state = cell.get_initial_state(inputs=embeddings, batch_size=batch_size, dtype=tf.float32) # Apply RNN rnn_outputs, states = tf.compat.v1.nn.dynamic_rnn( cell=cell, inputs=embeddings, initial_state=initial_state, dtype=tf.float32, scope=TRANSFORM_NAME) transformed = rnn_outputs.output # [B, T, D] self._ops[SKIP_GATES] = tf.squeeze(rnn_outputs.state_update_gate, axis=-1) # [B, T] elif self.model_type == SequenceModelType.PHASED_RNN: period_init = self.metadata[SEQ_LENGTH] cell = make_rnn_cell( cell_class=CellClass.PHASED, cell_type=CellType[ self.hypers.model_params['rnn_cell_type'].upper()], units=state_size, activation=self.hypers.model_params['rnn_activation'], recurrent_noise=activation_noise, on_fraction=self.hypers.model_params['on_fraction'], period_init=period_init, leak_rate=self.placeholders[LEAK_RATE], name=RNN_CELL_NAME) initial_state = cell.get_initial_state(inputs=embeddings, batch_size=batch_size, dtype=tf.float32) rnn_outputs, state = tf.compat.v1.nn.dynamic_rnn( cell=cell, inputs=embeddings, initial_state=initial_state, dtype=tf.float32, scope=TRANSFORM_NAME) transformed = rnn_outputs.output # [B, T, D] self._ops[PHASE_GATES] = tf.squeeze(rnn_outputs.time_gate, axis=-1) # [B, T] else: raise ValueError('Unknown standard model: {0}'.format( self.model_type)) # Reshape the output to match the sequence length. The output is tiled along the sequence length # automatically via broadcasting rules. if self.hypers.model_params.get('has_single_output', False): transformed = transformed[:, -1, :] # Take the final transformed state, [B, D] expected_output = self._placeholders[OUTPUT] else: expected_output = tf.expand_dims(self._placeholders[OUTPUT], axis=-1) # [B, 1, 1] # Create the output layer, result is a [B, T, C] tensor or a [B, C] tensor depending on the output type output_size = self.metadata[ NUM_OUTPUT_FEATURES] if self.output_type != OutputType.MULTI_CLASSIFICATION else self.metadata[ NUM_CLASSES] output, _ = mlp( inputs=transformed, output_size=self.num_output_features, hidden_sizes=self.hypers.model_params['output_hidden_units'], activations=self.hypers.model_params['output_hidden_activation'], dropout_keep_rate=dropout_keep_rate, activation_noise=activation_noise, should_bias_final=True, should_activate_final=False, should_dropout_final=False, name=OUTPUT_LAYER_NAME) if self.output_type == OutputType.BINARY_CLASSIFICATION: classification_output = compute_binary_classification_output( model_output=output, labels=expected_output) self._ops[LOGITS] = classification_output.logits self._ops[PREDICTION] = classification_output.predictions self._ops[ACCURACY] = classification_output.accuracy elif self.output_type == OutputType.MULTI_CLASSIFICATION: classification_output = compute_multi_classification_output( model_output=output, labels=expected_output) self._ops[LOGITS] = classification_output.logits self._ops[PREDICTION] = classification_output.predictions self._ops[ACCURACY] = classification_output.accuracy else: self._ops[PREDICTION] = output
def _make_rnn_model(self, is_train: bool): """ Builds an Adaptive RNN Model. """ state_size = self.hypers.model_params['state_size'] batch_size = tf.shape(self._placeholders[INPUTS])[0] activation_noise = self._placeholders[ACTIVATION_NOISE] dropout_keep_rate = self._placeholders[DROPOUT_KEEP_RATE] # Apply noise to the inputs inputs = apply_noise(self._placeholders[INPUTS], scale=activation_noise) # Compute the input embedding features, result is a [B, T, D] tensor embeddings, _ = dense( inputs=inputs, units=state_size, activation=self.hypers.model_params['embedding_activation'], activation_noise=activation_noise, use_bias=True, name=EMBEDDING_NAME) # Create the RNN Cell rnn_cell_class = CellClass.STANDARD if self.stride_length == 1 else CellClass.BUDGET rnn_cell = make_rnn_cell( cell_class=rnn_cell_class, cell_type=CellType[ self.hypers.model_params['rnn_cell_type'].upper()], units=state_size, activation=self.hypers.model_params['rnn_activation'], recurrent_noise=activation_noise, name=RNN_CELL_NAME) # Execute the RNN, outputs consist of a [B, L, D] tensor in the variable `transformed` if self.stride_length == 1: initial_state = rnn_cell.get_initial_state(inputs=embeddings, batch_size=batch_size, dtype=tf.float32) rnn_outputs, _ = tf.compat.v1.nn.dynamic_rnn( cell=rnn_cell, inputs=embeddings, initial_state=initial_state, dtype=tf.float32, scope=TRANSFORM_NAME) # Collect the outputs at the end of every chunk output_stride = int(self.seq_length / self.num_outputs) output_indices = list( range(output_stride - 1, self.seq_length, output_stride)) transformed = tf.gather(rnn_outputs, indices=output_indices, axis=1) # [B, L, D] stop_states = transformed # [B, L, D] else: prev_states = tf.compat.v1.get_variable( name='prev-states', initializer=tf.zeros_initializer(), shape=[1, 1, state_size], dtype=tf.float32, trainable=False) prev_states = tf.tile(prev_states, multiples=(batch_size, self.samples_per_seq, 1)) # [B, S, D] level_outputs: List[tf.Tensor] = [] level_stop_states: List[tf.Tensor] = [] for i in range(self.num_outputs): # Get the inputs for the current sub-sequence, S is the number of samples per # sub-sequence level_indices = list( range(i, self.seq_length, self.stride_length)) level_embeddings = tf.gather(embeddings, indices=level_indices, axis=1) # [B, S, D] # Construct the RNN inputs by concatenating the inputs with the previous states, [B, S, 2*D] rnn_inputs = tf.concat([level_embeddings, prev_states], axis=-1) # Apply the RNN to each sub-sequence, result is a [B, S, D] tensor fusion_mask = int(i > 0) rnn_cell.set_fusion_mask(mask_value=fusion_mask) initial_state = rnn_cell.get_initial_state( inputs=rnn_inputs, batch_size=batch_size, dtype=tf.float32) rnn_outputs, final_state = tf.compat.v1.nn.dynamic_rnn( cell=rnn_cell, inputs=rnn_inputs, initial_state=initial_state, dtype=tf.float32, scope=TRANSFORM_NAME) level_outputs.append(tf.expand_dims(final_state, axis=1)) level_stop_states.append( tf.expand_dims(rnn_outputs.output[:, 0, :], axis=1)) # Set sequence of previous states prev_states = rnn_outputs.output # Concatenate the outputs and first states from each sub-sequence into [B, L, D] tensors transformed = tf.concat(level_outputs, axis=1) stop_states = tf.concat(level_stop_states, axis=1) # Compute the stop output, Result is a [B, L, 1] tensor. stop_output, _ = mlp( inputs=stop_states, output_size=1, hidden_sizes=self.hypers.model_params['stop_output_hidden_units'], activations=self.hypers.model_params['stop_output_activation'], activation_noise=activation_noise, should_bias_final=True, should_activate_final=False, dropout_keep_rate=dropout_keep_rate, name=STOP_PREDICTION) stop_output_logits = tf.squeeze(stop_output, axis=-1) # [B, L] self._ops[STOP_OUTPUT_LOGITS] = stop_output_logits self._ops[STOP_OUTPUT_NAME] = tf.math.sigmoid( stop_output_logits) # [B, L] # Compute the predictions, Result is a [B, L, K] tensor output, _ = mlp( inputs=transformed, output_size=self.num_output_features, hidden_sizes=self.hypers.model_params['output_hidden_units'], activations=self.hypers.model_params['output_hidden_activation'], activation_noise=activation_noise, should_bias_final=True, should_activate_final=False, dropout_keep_rate=dropout_keep_rate, name=OUTPUT_LAYER_NAME) # Apply the pooling layer to mix outputs from each level. pool_W = tf.compat.v1.get_variable( name='{0}-kernel'.format(AGGREGATION_NAME), shape=[state_size * 2, 1], initializer=tf.compat.v1.initializers.glorot_uniform(), trainable=True) pool_b = tf.compat.v1.get_variable( name='{0}-bias'.format(AGGREGATION_NAME), shape=[1, 1], initializer=tf.compat.v1.initializers.random_uniform(minval=-0.7, maxval=0.7), trainable=True) output, weights = pool_predictions(pred=output, states=transformed, W=pool_W, b=pool_b, seq_length=self.num_outputs, activation_noise=activation_noise, name=AGGREGATION_NAME) # Reshape to [B, 1, 1] expected_output = tf.expand_dims(self._placeholders[OUTPUT], axis=-1) # Compute the output values if self.output_type == OutputType.BINARY_CLASSIFICATION: classification_output = compute_binary_classification_output( model_output=output, labels=expected_output) self._ops[LOGITS] = classification_output.logits self._ops[PREDICTION] = classification_output.predictions self._ops[ACCURACY] = classification_output.accuracy elif self.output_type == OutputType.MULTI_CLASSIFICATION: classification_output = compute_multi_classification_output( model_output=output, labels=expected_output) self._ops[LOGITS] = classification_output.logits self._ops[PREDICTION] = classification_output.predictions self._ops[ACCURACY] = classification_output.accuracy else: self._ops[PREDICTION] = output
def dense( inputs: tf.Tensor, units: int, activation: Optional[str], activation_noise: tf.Tensor, name: str, use_bias: bool, dropout_keep_rate: Optional[Union[float, tf.Tensor]] = None ) -> Tuple[tf.Tensor, tf.Tensor]: """ Creates a dense, feed-forward layer with the given parameters. Args: inputs: The input tensor. Has the shape [B, ..., D] units: The number of output units. Denoted by K. activation: Optional activation function. If none, the activation is linear. activation_noise: Noise scale to apply to the final activations name: Name prefix for the created trainable variables. use_bias: Whether to add a bias to the output. dropout_keep_rate: Optional dropout to apply to the activations Returns: A tuple of 2 elements: (1) the transformed inputs in a [B, ..., K] tensor and (2) the transformed inputs without the activation function. This second entry is included for debugging purposes. """ # Get the size of the input features, denoted by D input_units = inputs.get_shape()[-1] # Create the weight matrix W = tf.compat.v1.get_variable( name='{0}-kernel'.format(name), shape=[input_units, units], initializer=tf.compat.v1.initializers.glorot_uniform(), trainable=True) # Apply the given weights transformed = tf.matmul(inputs, W) # [B, ..., K] # Add the bias if specified if use_bias: # Bias vector of size [K] b = tf.compat.v1.get_variable( name='{0}-bias'.format(name), shape=[1, units], initializer=tf.compat.v1.initializers.random_uniform(minval=-0.7, maxval=0.7), trainable=True) transformed = transformed + b pre_activation = transformed # Apply the activation function if specified activation_fn = get_activation(activation) if activation_fn is not None: transformed = activation_fn(transformed) # Apply noise regularization transformed = apply_noise(transformed, scale=activation_noise) if dropout_keep_rate is not None: transformed = tf.nn.dropout(transformed, rate=1.0 - dropout_keep_rate) return transformed, pre_activation
def conv_1d(inputs: tf.Tensor, filter_width: int, stride: int, activation: Optional[str], activation_noise: float, dropout_keep_rate: tf.Tensor, use_dropout: bool, name: str) -> tf.Tensor: """ Performs a 1d convolution over the given inputs. Args: inputs: A [B, T, D] tensor of features (D) for each seq element (T) and batch sample (B) filter_width: The width of the convolution filter. Must be at least one. stride: The convolution stride. Must be at least one. activation: The name of the activation function. If none, then we apply a linear activation. activation_noise: The noise to apply to the final activations. dropout_keep_rate: The dropout keep rate to apply to the transformed representation. use_dropout: Whether to apply dropout. name: The name of this layer. Returns: A [B, T, D] tensor that is the result of applying the 1d convolution filter to the inputs. """ assert filter_width >= 1, 'Must have a filter width of at least one. Got: {0}'.format( filter_width) assert stride >= 1, 'Must have a stride length of at least one. Got: {0}'.format( stride) with tf.variable_scope(name): # Create the (trainable) convolution filter num_features = inputs.get_shape()[-1] # D conv_filter = tf.get_variable( shape=[filter_width, num_features, num_features], initializer=tf.glorot_uniform_initializer(), name='filter', dtype=tf.float32) # Create the (trainable) bias bias = tf.get_variable(shape=[1, 1, num_features], initializer=tf.random_uniform_initializer( minval=-0.7, maxval=0.7), name='bias', dtype=tf.float32) # Apply the convolution filter, [B, T, D] transformed = tf.nn.conv1d(value=inputs, filters=conv_filter, stride=stride, padding='SAME', data_format='NWC') transformed = transformed + bias # [B, T, D] # Apply the activation function, [B, T, D] activation_fn = get_activation(activation) if activation_fn is not None: transformed = activation_fn(transformed) # Apply the activation noise transformed = apply_noise(transformed, scale=activation_noise) # Apply dropout if specified, [B, T, D] if use_dropout: transformed = tf.nn.dropout(transformed, keep_prob=dropout_keep_rate) return transformed