Exemplo n.º 1
0
    def __call__(
        self,
        inputs: tf.Tensor,
        state: PhasedUGRNNStateTuple,
        scope: Optional[str] = None
    ) -> Tuple[PhasedUGRNNOutputTuple, PhasedUGRNNStateTuple]:
        # Unpack the previous state
        prev_state, time = state

        scope = scope if scope is not None else type(self).__name__
        with tf.compat.v1.variable_scope(scope):

            # Apply the standard UGRNN update, [B, D]
            next_cell_state = ugrnn(inputs=inputs,
                                    state=prev_state,
                                    W_transform=self.W_transform,
                                    b_transform=self.b_transform,
                                    activation=self._activation)

            # Apply regularization noise
            next_cell_state = apply_noise(next_cell_state,
                                          scale=self._recurrent_noise)

            # Apply the time oscillation gate
            kt = time_gate(time=time,
                           period=self.period,
                           on_fraction=self._on_fraction,
                           shift=self.shift,
                           leak_rate=self._leak_rate)
            next_state = kt * next_cell_state + (1 - kt) * prev_state

            phased_state = PhasedUGRNNStateTuple(next_state, time + 1)
            phased_output = PhasedUGRNNOutputTuple(next_state, kt)

        return phased_output, phased_state
Exemplo n.º 2
0
    def __call__(self, inputs: tf.Tensor, state: tf.Tensor, scope=None) -> Tuple[tf.Tensor, tf.Tensor]:
        scope = scope if scope is not None else type(self).__name__

        with tf.compat.v1.variable_scope(scope):

            # Apply the standard UGRNN update, [B, D]
            next_state = ugrnn(inputs=inputs,
                               state=state,
                               W_transform=self.W_transform,
                               b_transform=self.b_transform,
                               activation=self._activation)

            # Apply regularization noise
            next_state = apply_noise(next_state, scale=self._recurrent_noise)

        return next_state, next_state
Exemplo n.º 3
0
    def __call__(
            self,
            inputs: tf.Tensor,
            state: SkipUGRNNStateTuple,
            scope=None) -> Tuple[SkipUGRNNOutputTuple, SkipUGRNNStateTuple]:
        # Unpack the previous state
        prev_state, prev_cum_state_update_prob = state

        scope = scope if scope is not None else type(self).__name__
        with tf.compat.v1.variable_scope(scope):
            # Apply the standard UGRNN update, [B, D]
            next_cell_state = ugrnn(inputs=inputs,
                                    state=prev_state,
                                    W_transform=self.W_transform,
                                    b_transform=self.b_transform,
                                    activation=self._activation)

            # Apply regularization noise
            next_cell_state = apply_noise(next_cell_state,
                                          scale=self._recurrent_noise)

            # Apply the state update gate. This is the Skip portion.
            # We first compute the state update gate. This is a binary version of the cumulative state update prob.
            state_update_gate = binarize(
                prev_cum_state_update_prob)  # A [B, 1] binary tensor

            # Apply the binary state update gate to get the next state, [B, D]
            next_state = state_update_gate * next_cell_state + (
                1 - state_update_gate) * prev_state

            # Compute the next state update probability (clipped into the range [0, 1])
            delta_state_update_prob = tf.math.sigmoid(
                tf.matmul(next_state, self.W_state) + self.b_state)  # [B, 1]
            cum_prob_candidate = prev_cum_state_update_prob + tf.minimum(
                delta_state_update_prob, 1.0 - prev_cum_state_update_prob)
            cum_state_update_prob = state_update_gate * delta_state_update_prob + (
                1 - state_update_gate) * cum_prob_candidate

            skip_state = SkipUGRNNStateTuple(next_state, cum_state_update_prob)
            skip_output = SkipUGRNNOutputTuple(next_state, state_update_gate,
                                               delta_state_update_prob)

        return skip_output, skip_state
Exemplo n.º 4
0
    def __call__(self, inputs: tf.Tensor, state: tf.Tensor, scope=None) -> Tuple[BudgetOutput, tf.Tensor]:
        scope = scope if scope is not None else type(self).__name__

        with tf.compat.v1.variable_scope(scope):
            # Split inputs into two [B, D] tensors
            inputs, prev_state = tf.split(inputs, num_or_size_splits=2, axis=-1)

            states_concat = tf.concat([state, prev_state], axis=-1)  # [B, 2 * D]
            fusion = tf.matmul(states_concat, self.W_fusion)  # [B, D]
            fusion_gate = self._fusion_mask * (1.0 - tf.math.sigmoid(fusion + self.b_fusion))  # [B, D]
            fused_state = (1.0 - fusion_gate) * state + fusion_gate * prev_state

            # Apply the standard UGRNN update, [B, D]
            next_state = ugrnn(inputs=inputs,
                               state=fused_state,
                               W_transform=self.W_transform,
                               b_transform=self.b_transform,
                               activation=self._activation)

            # Apply regularization_noise
            next_state = apply_noise(next_state, scale=self._recurrent_noise)

        return BudgetOutput(output=next_state, fusion=fused_state), next_state
Exemplo n.º 5
0
    def _make_model(self, is_train: bool):
        """
        Builds the computation graph for this model.
        """
        state_size = self.hypers.model_params['state_size']
        batch_size = tf.shape(self._placeholders[INPUTS])[0]
        activation_noise = self._placeholders[ACTIVATION_NOISE]
        dropout_keep_rate = self._placeholders[DROPOUT_KEEP_RATE]

        # Apply input noise
        inputs = apply_noise(self._placeholders[INPUTS],
                             scale=activation_noise)

        # Embed the input sequence into a [B, T, D] tensor
        embeddings, _ = dense(
            inputs=inputs,
            units=state_size,
            activation=self.hypers.model_params['embedding_activation'],
            use_bias=True,
            activation_noise=activation_noise,
            name=EMBEDDING_NAME)

        # Apply the transformation layer. The output is a [B, T, D] tensor of transformed inputs for each model type.
        if self.model_type == SequenceModelType.NBOW:
            # Apply the MLP transformation. Result is a [B, T, D] tensor
            transformed, _ = mlp(
                inputs=embeddings,
                output_size=state_size,
                hidden_sizes=self.hypers.model_params['mlp_hidden_units'],
                activations=self.hypers.model_params['mlp_activation'],
                dropout_keep_rate=dropout_keep_rate,
                activation_noise=activation_noise,
                should_activate_final=True,
                should_bias_final=True,
                should_dropout_final=True,
                name=TRANSFORM_NAME)

            # Compute weights for aggregation layer, [B, T, 1]
            aggregation_weights, _ = dense(inputs=transformed,
                                           units=1,
                                           activation='sigmoid',
                                           activation_noise=activation_noise,
                                           use_bias=True,
                                           name=AGGREGATION_NAME)

            # Pool the data in a successive fashion, [B, T, D]
            transformed = successive_pooling(
                inputs=transformed,
                aggregation_weights=aggregation_weights,
                name='{0}-pool'.format(AGGREGATION_NAME),
                seq_length=self.metadata[SEQ_LENGTH])
        elif self.model_type == SequenceModelType.CONV:
            # Apply the convolution filter, [B, T, D]
            filtered = conv_1d(
                inputs=embeddings,
                filter_width=self.hypers.model_params['conv_filter_width'],
                stride=1,
                activation=self.hypers.model_params['conv_activation'],
                activation_noise=activation_noise,
                dropout_keep_rate=dropout_keep_rate,
                use_dropout=True,
                name=TRANSFORM_NAME)

            # Compute the aggregation weights, [B, T, 1]
            aggregation_weights, _ = dense(inputs=filtered,
                                           units=1,
                                           activation='sigmoid',
                                           activation_noise=activation_noise,
                                           use_bias=True,
                                           name=AGGREGATION_NAME)

            # Pool the data in a successive fashion, [B, T, D]
            transformed = successive_pooling(
                inputs=filtered,
                aggregation_weights=aggregation_weights,
                name='{0}-pool'.format(AGGREGATION_NAME),
                seq_length=self.metadata[SEQ_LENGTH])
        elif self.model_type == SequenceModelType.RNN:
            cell = make_rnn_cell(
                cell_class=CellClass.STANDARD,
                cell_type=CellType[
                    self.hypers.model_params['rnn_cell_type'].upper()],
                units=state_size,
                activation=self.hypers.model_params['rnn_activation'],
                recurrent_noise=activation_noise,
                name=RNN_CELL_NAME)

            initial_state = cell.zero_state(batch_size=batch_size,
                                            dtype=tf.float32)
            rnn_outputs, state = tf.compat.v1.nn.dynamic_rnn(
                cell=cell,
                inputs=embeddings,
                initial_state=initial_state,
                dtype=tf.float32,
                scope=TRANSFORM_NAME)
            transformed = rnn_outputs  # [B, T, D]
        elif self.model_type == SequenceModelType.SKIP_RNN:
            cell = make_rnn_cell(
                cell_class=CellClass.SKIP,
                cell_type=CellType[
                    self.hypers.model_params['rnn_cell_type'].upper()],
                units=state_size,
                activation=self.hypers.model_params['rnn_activation'],
                recurrent_noise=activation_noise,
                name=RNN_CELL_NAME)

            initial_state = cell.get_initial_state(inputs=embeddings,
                                                   batch_size=batch_size,
                                                   dtype=tf.float32)
            # Apply RNN
            rnn_outputs, states = tf.compat.v1.nn.dynamic_rnn(
                cell=cell,
                inputs=embeddings,
                initial_state=initial_state,
                dtype=tf.float32,
                scope=TRANSFORM_NAME)
            transformed = rnn_outputs.output  # [B, T, D]
            self._ops[SKIP_GATES] = tf.squeeze(rnn_outputs.state_update_gate,
                                               axis=-1)  # [B, T]
        elif self.model_type == SequenceModelType.PHASED_RNN:
            period_init = self.metadata[SEQ_LENGTH]

            cell = make_rnn_cell(
                cell_class=CellClass.PHASED,
                cell_type=CellType[
                    self.hypers.model_params['rnn_cell_type'].upper()],
                units=state_size,
                activation=self.hypers.model_params['rnn_activation'],
                recurrent_noise=activation_noise,
                on_fraction=self.hypers.model_params['on_fraction'],
                period_init=period_init,
                leak_rate=self.placeholders[LEAK_RATE],
                name=RNN_CELL_NAME)

            initial_state = cell.get_initial_state(inputs=embeddings,
                                                   batch_size=batch_size,
                                                   dtype=tf.float32)

            rnn_outputs, state = tf.compat.v1.nn.dynamic_rnn(
                cell=cell,
                inputs=embeddings,
                initial_state=initial_state,
                dtype=tf.float32,
                scope=TRANSFORM_NAME)
            transformed = rnn_outputs.output  # [B, T, D]
            self._ops[PHASE_GATES] = tf.squeeze(rnn_outputs.time_gate,
                                                axis=-1)  # [B, T]
        else:
            raise ValueError('Unknown standard model: {0}'.format(
                self.model_type))

        # Reshape the output to match the sequence length. The output is tiled along the sequence length
        # automatically via broadcasting rules.
        if self.hypers.model_params.get('has_single_output', False):
            transformed = transformed[:,
                                      -1, :]  # Take the final transformed state, [B, D]
            expected_output = self._placeholders[OUTPUT]
        else:
            expected_output = tf.expand_dims(self._placeholders[OUTPUT],
                                             axis=-1)  # [B, 1, 1]

        # Create the output layer, result is a [B, T, C] tensor or a [B, C] tensor depending on the output type
        output_size = self.metadata[
            NUM_OUTPUT_FEATURES] if self.output_type != OutputType.MULTI_CLASSIFICATION else self.metadata[
                NUM_CLASSES]
        output, _ = mlp(
            inputs=transformed,
            output_size=self.num_output_features,
            hidden_sizes=self.hypers.model_params['output_hidden_units'],
            activations=self.hypers.model_params['output_hidden_activation'],
            dropout_keep_rate=dropout_keep_rate,
            activation_noise=activation_noise,
            should_bias_final=True,
            should_activate_final=False,
            should_dropout_final=False,
            name=OUTPUT_LAYER_NAME)

        if self.output_type == OutputType.BINARY_CLASSIFICATION:
            classification_output = compute_binary_classification_output(
                model_output=output, labels=expected_output)
            self._ops[LOGITS] = classification_output.logits
            self._ops[PREDICTION] = classification_output.predictions
            self._ops[ACCURACY] = classification_output.accuracy
        elif self.output_type == OutputType.MULTI_CLASSIFICATION:
            classification_output = compute_multi_classification_output(
                model_output=output, labels=expected_output)
            self._ops[LOGITS] = classification_output.logits
            self._ops[PREDICTION] = classification_output.predictions
            self._ops[ACCURACY] = classification_output.accuracy
        else:
            self._ops[PREDICTION] = output
Exemplo n.º 6
0
    def _make_rnn_model(self, is_train: bool):
        """
        Builds an Adaptive RNN Model.
        """
        state_size = self.hypers.model_params['state_size']
        batch_size = tf.shape(self._placeholders[INPUTS])[0]
        activation_noise = self._placeholders[ACTIVATION_NOISE]
        dropout_keep_rate = self._placeholders[DROPOUT_KEEP_RATE]

        # Apply noise to the inputs
        inputs = apply_noise(self._placeholders[INPUTS],
                             scale=activation_noise)

        # Compute the input embedding features, result is a [B, T, D] tensor
        embeddings, _ = dense(
            inputs=inputs,
            units=state_size,
            activation=self.hypers.model_params['embedding_activation'],
            activation_noise=activation_noise,
            use_bias=True,
            name=EMBEDDING_NAME)

        # Create the RNN Cell
        rnn_cell_class = CellClass.STANDARD if self.stride_length == 1 else CellClass.BUDGET
        rnn_cell = make_rnn_cell(
            cell_class=rnn_cell_class,
            cell_type=CellType[
                self.hypers.model_params['rnn_cell_type'].upper()],
            units=state_size,
            activation=self.hypers.model_params['rnn_activation'],
            recurrent_noise=activation_noise,
            name=RNN_CELL_NAME)

        # Execute the RNN, outputs consist of a [B, L, D] tensor in the variable `transformed`
        if self.stride_length == 1:
            initial_state = rnn_cell.get_initial_state(inputs=embeddings,
                                                       batch_size=batch_size,
                                                       dtype=tf.float32)
            rnn_outputs, _ = tf.compat.v1.nn.dynamic_rnn(
                cell=rnn_cell,
                inputs=embeddings,
                initial_state=initial_state,
                dtype=tf.float32,
                scope=TRANSFORM_NAME)

            # Collect the outputs at the end of every chunk
            output_stride = int(self.seq_length / self.num_outputs)
            output_indices = list(
                range(output_stride - 1, self.seq_length, output_stride))
            transformed = tf.gather(rnn_outputs,
                                    indices=output_indices,
                                    axis=1)  # [B, L, D]
            stop_states = transformed  # [B, L, D]
        else:
            prev_states = tf.compat.v1.get_variable(
                name='prev-states',
                initializer=tf.zeros_initializer(),
                shape=[1, 1, state_size],
                dtype=tf.float32,
                trainable=False)
            prev_states = tf.tile(prev_states,
                                  multiples=(batch_size, self.samples_per_seq,
                                             1))  # [B, S, D]

            level_outputs: List[tf.Tensor] = []
            level_stop_states: List[tf.Tensor] = []
            for i in range(self.num_outputs):
                # Get the inputs for the current sub-sequence, S is the number of samples per
                # sub-sequence
                level_indices = list(
                    range(i, self.seq_length, self.stride_length))
                level_embeddings = tf.gather(embeddings,
                                             indices=level_indices,
                                             axis=1)  # [B, S, D]

                # Construct the RNN inputs by concatenating the inputs with the previous states, [B, S, 2*D]
                rnn_inputs = tf.concat([level_embeddings, prev_states],
                                       axis=-1)

                # Apply the RNN to each sub-sequence, result is a [B, S, D] tensor
                fusion_mask = int(i > 0)
                rnn_cell.set_fusion_mask(mask_value=fusion_mask)

                initial_state = rnn_cell.get_initial_state(
                    inputs=rnn_inputs, batch_size=batch_size, dtype=tf.float32)
                rnn_outputs, final_state = tf.compat.v1.nn.dynamic_rnn(
                    cell=rnn_cell,
                    inputs=rnn_inputs,
                    initial_state=initial_state,
                    dtype=tf.float32,
                    scope=TRANSFORM_NAME)

                level_outputs.append(tf.expand_dims(final_state, axis=1))
                level_stop_states.append(
                    tf.expand_dims(rnn_outputs.output[:, 0, :], axis=1))

                # Set sequence of previous states
                prev_states = rnn_outputs.output

            # Concatenate the outputs and first states from each sub-sequence into [B, L, D] tensors
            transformed = tf.concat(level_outputs, axis=1)
            stop_states = tf.concat(level_stop_states, axis=1)

        # Compute the stop output, Result is a [B, L, 1] tensor.
        stop_output, _ = mlp(
            inputs=stop_states,
            output_size=1,
            hidden_sizes=self.hypers.model_params['stop_output_hidden_units'],
            activations=self.hypers.model_params['stop_output_activation'],
            activation_noise=activation_noise,
            should_bias_final=True,
            should_activate_final=False,
            dropout_keep_rate=dropout_keep_rate,
            name=STOP_PREDICTION)

        stop_output_logits = tf.squeeze(stop_output, axis=-1)  # [B, L]
        self._ops[STOP_OUTPUT_LOGITS] = stop_output_logits
        self._ops[STOP_OUTPUT_NAME] = tf.math.sigmoid(
            stop_output_logits)  # [B, L]

        # Compute the predictions, Result is a [B, L, K] tensor
        output, _ = mlp(
            inputs=transformed,
            output_size=self.num_output_features,
            hidden_sizes=self.hypers.model_params['output_hidden_units'],
            activations=self.hypers.model_params['output_hidden_activation'],
            activation_noise=activation_noise,
            should_bias_final=True,
            should_activate_final=False,
            dropout_keep_rate=dropout_keep_rate,
            name=OUTPUT_LAYER_NAME)

        # Apply the pooling layer to mix outputs from each level.
        pool_W = tf.compat.v1.get_variable(
            name='{0}-kernel'.format(AGGREGATION_NAME),
            shape=[state_size * 2, 1],
            initializer=tf.compat.v1.initializers.glorot_uniform(),
            trainable=True)
        pool_b = tf.compat.v1.get_variable(
            name='{0}-bias'.format(AGGREGATION_NAME),
            shape=[1, 1],
            initializer=tf.compat.v1.initializers.random_uniform(minval=-0.7,
                                                                 maxval=0.7),
            trainable=True)
        output, weights = pool_predictions(pred=output,
                                           states=transformed,
                                           W=pool_W,
                                           b=pool_b,
                                           seq_length=self.num_outputs,
                                           activation_noise=activation_noise,
                                           name=AGGREGATION_NAME)

        # Reshape to [B, 1, 1]
        expected_output = tf.expand_dims(self._placeholders[OUTPUT], axis=-1)

        # Compute the output values
        if self.output_type == OutputType.BINARY_CLASSIFICATION:
            classification_output = compute_binary_classification_output(
                model_output=output, labels=expected_output)
            self._ops[LOGITS] = classification_output.logits
            self._ops[PREDICTION] = classification_output.predictions
            self._ops[ACCURACY] = classification_output.accuracy
        elif self.output_type == OutputType.MULTI_CLASSIFICATION:
            classification_output = compute_multi_classification_output(
                model_output=output, labels=expected_output)
            self._ops[LOGITS] = classification_output.logits
            self._ops[PREDICTION] = classification_output.predictions
            self._ops[ACCURACY] = classification_output.accuracy
        else:
            self._ops[PREDICTION] = output
Exemplo n.º 7
0
def dense(
    inputs: tf.Tensor,
    units: int,
    activation: Optional[str],
    activation_noise: tf.Tensor,
    name: str,
    use_bias: bool,
    dropout_keep_rate: Optional[Union[float, tf.Tensor]] = None
) -> Tuple[tf.Tensor, tf.Tensor]:
    """
    Creates a dense, feed-forward layer with the given parameters.

    Args:
        inputs: The input tensor. Has the shape [B, ..., D]
        units: The number of output units. Denoted by K.
        activation: Optional activation function. If none, the activation is linear.
        activation_noise: Noise scale to apply to the final activations
        name: Name prefix for the created trainable variables.
        use_bias: Whether to add a bias to the output.
        dropout_keep_rate: Optional dropout to apply to the activations
    Returns:
        A tuple of 2 elements: (1) the transformed inputs in a [B, ..., K] tensor and (2) the transformed inputs without the activation function.
            This second entry is included for debugging purposes.
    """
    # Get the size of the input features, denoted by D
    input_units = inputs.get_shape()[-1]

    # Create the weight matrix
    W = tf.compat.v1.get_variable(
        name='{0}-kernel'.format(name),
        shape=[input_units, units],
        initializer=tf.compat.v1.initializers.glorot_uniform(),
        trainable=True)

    # Apply the given weights
    transformed = tf.matmul(inputs, W)  # [B, ..., K]

    # Add the bias if specified
    if use_bias:
        # Bias vector of size [K]
        b = tf.compat.v1.get_variable(
            name='{0}-bias'.format(name),
            shape=[1, units],
            initializer=tf.compat.v1.initializers.random_uniform(minval=-0.7,
                                                                 maxval=0.7),
            trainable=True)
        transformed = transformed + b

    pre_activation = transformed

    # Apply the activation function if specified
    activation_fn = get_activation(activation)
    if activation_fn is not None:
        transformed = activation_fn(transformed)

    # Apply noise regularization
    transformed = apply_noise(transformed, scale=activation_noise)

    if dropout_keep_rate is not None:
        transformed = tf.nn.dropout(transformed, rate=1.0 - dropout_keep_rate)

    return transformed, pre_activation
Exemplo n.º 8
0
def conv_1d(inputs: tf.Tensor, filter_width: int, stride: int,
            activation: Optional[str], activation_noise: float,
            dropout_keep_rate: tf.Tensor, use_dropout: bool,
            name: str) -> tf.Tensor:
    """
    Performs a 1d convolution over the given inputs.

    Args:
        inputs: A [B, T, D] tensor of features (D) for each seq element (T) and batch sample (B)
        filter_width: The width of the convolution filter. Must be at least one.
        stride: The convolution stride. Must be at least one.
        activation: The name of the activation function. If none, then we apply a linear activation.
        activation_noise: The noise to apply to the final activations.
        dropout_keep_rate: The dropout keep rate to apply to the transformed representation.
        use_dropout: Whether to apply dropout.
        name: The name of this layer.
    Returns:
        A [B, T, D] tensor that is the result of applying the 1d convolution filter
            to the inputs.
    """
    assert filter_width >= 1, 'Must have a filter width of at least one. Got: {0}'.format(
        filter_width)
    assert stride >= 1, 'Must have a stride length of at least one. Got: {0}'.format(
        stride)

    with tf.variable_scope(name):
        # Create the (trainable) convolution filter
        num_features = inputs.get_shape()[-1]  # D
        conv_filter = tf.get_variable(
            shape=[filter_width, num_features, num_features],
            initializer=tf.glorot_uniform_initializer(),
            name='filter',
            dtype=tf.float32)

        # Create the (trainable) bias
        bias = tf.get_variable(shape=[1, 1, num_features],
                               initializer=tf.random_uniform_initializer(
                                   minval=-0.7, maxval=0.7),
                               name='bias',
                               dtype=tf.float32)

        # Apply the convolution filter, [B, T, D]
        transformed = tf.nn.conv1d(value=inputs,
                                   filters=conv_filter,
                                   stride=stride,
                                   padding='SAME',
                                   data_format='NWC')

        transformed = transformed + bias  # [B, T, D]

        # Apply the activation function, [B, T, D]
        activation_fn = get_activation(activation)
        if activation_fn is not None:
            transformed = activation_fn(transformed)

        # Apply the activation noise
        transformed = apply_noise(transformed, scale=activation_noise)

        # Apply dropout if specified, [B, T, D]
        if use_dropout:
            transformed = tf.nn.dropout(transformed,
                                        keep_prob=dropout_keep_rate)

        return transformed