Exemplo n.º 1
0
    def decoder_teacher_forcing(
            self,
            encoder_output,
            target=None,
            encoder_end_state=None
    ):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]

        # Prepare target for decoding
        target_sequence_length = sequence_length_2D(target)
        start_tokens = tf.tile([self.GO_SYMBOL], [batch_size])
        end_tokens = tf.tile([self.END_SYMBOL], [batch_size])
        if self.is_timeseries:
            start_tokens = tf.cast(start_tokens, tf.float32)
            end_tokens = tf.cast(end_tokens, tf.float32)
        targets_with_go_and_eos = tf.concat([
            tf.expand_dims(start_tokens, 1),
            target,  # todo tf2: right now cast to tf.int32, fails if tf.int64
            tf.expand_dims(end_tokens, 1)], 1)
        target_sequence_length_with_eos = target_sequence_length + 1

        # Decoder Embeddings
        decoder_emb_inp = self.decoder_embedding(targets_with_go_and_eos)

        # Setting up decoder memory from encoder output
        if self.attention_mechanism is not None:
            encoder_sequence_length = sequence_length_3D(encoder_output)
            self.attention_mechanism.setup_memory(
                encoder_output,
                memory_sequence_length=encoder_sequence_length
            )

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size,
            encoder_state=encoder_end_state,
            dtype=tf.float32
        )

        decoder = tfa.seq2seq.BasicDecoder(
            self.decoder_rnncell,
            sampler=self.sampler,
            output_layer=self.dense_layer
        )

        # BasicDecoderOutput
        outputs, final_state, generated_sequence_lengths = decoder(
            decoder_emb_inp,
            initial_state=decoder_initial_state,
            sequence_length=target_sequence_length_with_eos
        )

        logits = outputs.rnn_output
        mask = tf.sequence_mask(
            generated_sequence_lengths,
            maxlen=logits.shape[1],
            dtype=tf.float32
        )
        logits = logits * mask[:, :, tf.newaxis]
        return logits  # , outputs, final_state, generated_sequence_lengths
Exemplo n.º 2
0
    def call(self, y_true, y_pred):
        # y_true: shape [batch_size, sequence_size]
        # y_pred: shape [batch_size, sequence_size, num_classes]

        y_pred_tensor = y_pred[LOGITS]
        y_true_tensor = tf.cast(y_true, dtype=tf.int64)

        # pad the shorter sequence
        y_pred_seq_len = tf.shape(y_pred_tensor)[1]
        y_true_seq_len = tf.shape(y_true_tensor)[1]

        y_pred_pad_len = tf.maximum(0, y_true_seq_len - y_pred_seq_len)
        y_true_pad_len = tf.maximum(0, y_pred_seq_len - y_true_seq_len)

        y_pred_tensor = tf.pad(y_pred_tensor,
                               [[0, 0], [0, y_pred_pad_len], [0, 0]])
        y_true_tensor = tf.pad(y_true_tensor, [[0, 0], [0, y_true_pad_len]])

        longest_sequence_length = tf.maximum(sequence_length_2D(y_true_tensor),
                                             sequence_length_3D(y_pred_tensor))
        longest_sequence_length += 1  # for EOS
        longest_sequence_length = tf.minimum(longest_sequence_length,
                                             tf.shape(y_true_tensor)[1])
        mask = tf.sequence_mask(longest_sequence_length,
                                maxlen=tf.shape(y_true_tensor)[1],
                                dtype=tf.float32)
        # compute loss based on valid time steps
        loss = self.loss_function(y_true_tensor, y_pred_tensor)
        loss = loss * mask
        loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
        return loss
Exemplo n.º 3
0
def reduce_last(sequence, **kwargs):
    batch_size = tf.shape(sequence)[0]
    sequence_length = sequence_length_3D(sequence)
    # gather the correct outputs from the the RNN outputs (the outputs after sequence_length are all 0s)
    return tf.gather_nd(
        sequence,
        tf.stack([tf.range(batch_size),
                  tf.maximum(sequence_length - 1, 0)],
                 axis=1))
Exemplo n.º 4
0
 def call(self, inputs, training=None, mask=None):
     batch_size = tf.shape(inputs)[0]
     sequence_length = sequence_length_3D(inputs)
     # gather the correct outputs from the the RNN outputs (the outputs after sequence_length are all 0s)
     gathered = tf.gather_nd(
         inputs,
         tf.stack(
             [tf.range(batch_size),
              tf.maximum(sequence_length - 1, 0)],
             axis=1))
     return gathered
Exemplo n.º 5
0
    def call(self, y_true, y_pred):
        # y_true: shape [batch_size, sequence_size]
        # y_pred: shape [batch_size, sequence_size, num_classes]

        y_pred = y_pred[LOGITS]
        y_true = tf.convert_to_tensor(y_true, dtype=tf.int64)

        # pad the shorter sequence
        if y_true.shape[1] > y_pred.shape[1]:
            pad = tf.zeros([
                y_pred.shape[0], y_true.shape[1] - y_pred.shape[1],
                y_pred.shape[2]
            ],
                           dtype=y_pred.dtype)
            y_pred = tf.concat([y_pred, pad], axis=1)
        elif y_pred.shape[1] > y_true.shape[1]:
            pad = tf.zeros([
                y_true.shape[0],
                y_pred.shape[1] - y_true.shape[1],
            ],
                           dtype=y_true.dtype)
            y_true = tf.concat([y_true, pad], axis=1)

        longest_sequence_length = tf.maximum(sequence_length_2D(y_true),
                                             sequence_length_3D(y_pred))
        longest_sequence_length += 1  # for EOS
        longest_sequence_length = tf.minimum(longest_sequence_length,
                                             y_true.shape[1])
        mask = tf.sequence_mask(longest_sequence_length,
                                maxlen=y_true.shape[1],
                                dtype=tf.float32)
        # compute loss based on valid time steps
        loss = self.loss_function(y_true, y_pred)
        loss = loss * mask
        loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
        return loss
Exemplo n.º 6
0
    def decoder_teacher_forcing(
            self,
            encoder_output,
            target=None,
            encoder_end_state=None
    ):
        # ================ Setup ================
        batch_size = tf.shape(encoder_output)[0]

        # Prepare target for decoding
        target_sequence_length = sequence_length_2D(target)
        start_tokens = tf.tile([self.GO_SYMBOL], [batch_size])
        end_tokens = tf.tile([self.END_SYMBOL], [batch_size])
        if self.is_timeseries:
            start_tokens = tf.cast(start_tokens, tf.float32)
            end_tokens = tf.cast(end_tokens, tf.float32)
        targets_with_go_and_eos = tf.concat([
            tf.expand_dims(start_tokens, 1),
            target,  # right now cast to tf.int32, fails if tf.int64
            tf.expand_dims(end_tokens, 1)], 1)
        target_sequence_length_with_eos = target_sequence_length + 1

        # Decoder Embeddings
        decoder_emb_inp = self.decoder_embedding(targets_with_go_and_eos)

        # Setting up decoder memory from encoder output
        if self.attention_mechanism is not None:
            encoder_sequence_length = sequence_length_3D(encoder_output)
            self.attention_mechanism.setup_memory(
                encoder_output,
                memory_sequence_length=encoder_sequence_length
            )

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size,
            encoder_state=encoder_end_state,
            dtype=tf.float32
        )

        # use Ludwig custom BasicDecoder
        decoder = BasicDecoder(
            self.decoder_rnncell,
            sampler=self.sampler,
            output_layer=self.dense_layer
        )

        # BasicDecoderOutput
        outputs, final_state, generated_sequence_lengths = decoder(
            decoder_emb_inp,
            initial_state=decoder_initial_state,
            sequence_length=target_sequence_length_with_eos
        )

        logits = outputs.rnn_output
        # mask = tf.sequence_mask(
        #    generated_sequence_lengths,
        #    maxlen=tf.shape(logits)[1],
        #    dtype=tf.float32
        # )
        # logits = logits * mask[:, :, tf.newaxis]

        # append a trailing 0, useful for
        # those datapoints that reach maximum length
        # and don't have a eos at the end
        logits = tf.pad(
            logits,
            [[0, 0], [0, 1], [0, 0]]
        )

        # EXPECTED SIZE OF RETURNED TENSORS
        # logits: shape[batch_size, seq_size, num_classes] used for evaluation
        # projection_input: shape[batch_size, seq_size, state_size] for sampled softmax
        return {
            LOGITS: logits,
            PROJECTION_INPUT: outputs.projection_input
        }
Exemplo n.º 7
0
    def __call__(self,
                 output_feature,
                 targets,
                 hidden,
                 hidden_size,
                 regularizer,
                 is_timeseries=False):
        logging.info('  hidden shape: {0}'.format(hidden.shape))
        if len(hidden.shape) != 3:
            raise ValueError(
                'Decoder inputs rank is {}, but should be 3 [batch x sequence x hidden] '
                'when using a tagger sequential decoder. '
                'Consider setting reduce_output to null / None if a sequential encoder / combiner is used.'
                .format(len(hidden.shape)))

        if is_timeseries:
            output_feature['num_classes'] = 1

        if not self.regularize:
            regularizer = None

        sequence_length = tf.shape(hidden)[1]

        if self.attention:
            hidden, hidden_size = feed_forward_memory_attention(
                hidden, hidden, hidden_size)
        targets_sequence_length = sequence_length_2D(targets)

        initializer_obj = get_initializer(self.initializer)
        class_weights = tf.get_variable('weights',
                                        initializer=initializer_obj([
                                            hidden_size,
                                            output_feature['num_classes']
                                        ]),
                                        regularizer=regularizer)
        logging.debug('  weights: {0}'.format(class_weights))

        class_biases = tf.get_variable('biases',
                                       [output_feature['num_classes']])
        logging.debug('  biases: {0}'.format(class_biases))

        hidden_reshape = tf.reshape(hidden, [-1, hidden_size])
        logits_to_reshape = tf.matmul(hidden_reshape,
                                      class_weights) + class_biases
        logits = tf.reshape(
            logits_to_reshape,
            [-1, sequence_length, output_feature['num_classes']])
        logging.debug('  logits: {0}'.format(logits))

        if is_timeseries:
            probabilities_sequence = tf.zeros_like(logits)
            predictions_sequence = tf.reshape(logits, [-1, sequence_length])
        else:
            probabilities_sequence = tf.nn.softmax(
                logits, name='probabilities_{}'.format(output_feature['name']))
            predictions_sequence = tf.argmax(logits,
                                             -1,
                                             name='predictions_{}'.format(
                                                 output_feature['name']),
                                             output_type=tf.int32)

        predictions_sequence_length = sequence_length_3D(hidden)

        return predictions_sequence, probabilities_sequence, \
               predictions_sequence_length, \
               probabilities_sequence, targets_sequence_length, \
               logits, hidden, class_weights, class_biases
Exemplo n.º 8
0
    def __call__(
            self,
            inputs,  # encoder outputs
            training=None,
            mask=None,
            **kwargs):
        if (self.main_sequence_feature is None
                or self.main_sequence_feature not in inputs):
            for if_name, if_outputs in inputs.items():
                # todo: when https://github.com/uber/ludwig/issues/810 is closed
                #       convert following test from using shape to use explicit
                #       if_outputs['type'] values for sequence features
                if len(if_outputs['encoder_output'].shape) == 3:
                    self.main_sequence_feature = if_name
                    break

        if self.main_sequence_feature is None:
            raise Exception(
                'No sequence feature available for sequence combiner')

        main_sequence_feature_encoding = inputs[self.main_sequence_feature]

        representation = main_sequence_feature_encoding['encoder_output']
        representations = [representation]

        sequence_max_length = representation.shape[1]
        sequence_length = sequence_length_3D(representation)

        # ================ Concat ================
        for if_name, if_outputs in inputs.items():
            if if_name != self.main_sequence_feature:
                if_representation = if_outputs['encoder_output']
                if len(if_representation.shape) == 3:
                    # The following check makes sense when
                    # both representations have a specified
                    # sequence length dimension. If they do not,
                    # then this check is simply checking if None == None
                    # and will not catch discrepancies in the different
                    # feature length dimension. Those errors will show up
                    # at training time. Possible solutions to this is
                    # to enforce a length second dimension in
                    # sequential feature placeholders, but that
                    # does not work with BucketedBatcher that requires
                    # the second dimension to be undefined in order to be
                    # able to trim the data points and speed up computation.
                    # So for now we are keeping things like this, make sure
                    # to write in the documentation that training time
                    # dimensions mismatch may occur if the sequential
                    # features have different lengths for some data points.
                    if if_representation.shape[1] != representation.shape[1]:
                        raise ValueError(
                            'The sequence length of the input feature {} '
                            'is {} and is different from the sequence '
                            'length of the main sequence feature {} which '
                            'is {}.\n Shape of {}: {}, shape of {}: {}.\n'
                            'Sequence lengths of all sequential features '
                            'must be the same  in order to be concatenated '
                            'by the sequence concat combiner. '
                            'Try to impose the same max sequence length '
                            'as a preprocessing parameter to both features '
                            'or to reduce the output of {}.'.format(
                                if_name, if_representation.shape[1],
                                self.main_sequence_feature,
                                representation.shape[1], if_name,
                                if_representation.shape, if_name,
                                representation.shape, if_name))
                    # this assumes all sequence representations have the
                    # same sequence length, 2nd dimension
                    representations.append(if_representation)

                elif len(if_representation.shape) == 2:
                    multipliers = tf.constant([1, sequence_max_length, 1])
                    tiled_representation = tf.tile(
                        tf.expand_dims(if_representation, 1), multipliers)
                    representations.append(tiled_representation)

                else:
                    raise ValueError(
                        'The representation of {} has rank {} and cannot be'
                        ' concatenated by a sequence concat combiner. '
                        'Only rank 2 and rank 3 tensors are supported.'.format(
                            if_outputs['name'], len(if_representation.shape)))

        hidden = tf.concat(representations, 2)
        logger.debug('  concat_hidden: {0}'.format(hidden))

        # ================ Mask ================
        # todo future: maybe modify this with TF2 mask mechanics
        sequence_mask = tf.sequence_mask(sequence_length, sequence_max_length)
        hidden = tf.multiply(
            hidden, tf.cast(tf.expand_dims(sequence_mask, -1),
                            dtype=tf.float32))

        # ================ Reduce ================
        hidden = self.reduce_sequence(hidden)

        return_data = {'combiner_output': hidden}

        if len(inputs) == 1:
            for key, value in [d for d in inputs.values()][0].items():
                if key != 'encoder_output':
                    return_data[key] = value

        return return_data
Exemplo n.º 9
0
def recurrent_decoder(encoder_outputs,
                      targets,
                      max_sequence_length,
                      vocab_size,
                      cell_type='rnn',
                      state_size=256,
                      embedding_size=50,
                      num_layers=1,
                      attention_mechanism=None,
                      beam_width=1,
                      projection=True,
                      tied_target_embeddings=True,
                      embeddings=None,
                      initializer=None,
                      regularizer=None,
                      is_timeseries=False):
    with tf.variable_scope('rnn_decoder',
                           reuse=tf.AUTO_REUSE,
                           regularizer=regularizer):

        # ================ Setup ================
        if beam_width > 1 and is_timeseries:
            raise ValueError('Invalid beam_width: {}'.format(beam_width))

        GO_SYMBOL = vocab_size
        END_SYMBOL = 0
        batch_size = tf.shape(encoder_outputs)[0]

        # ================ Projection ================
        # Project the encoder outputs to the size of the decoder state
        encoder_outputs_size = encoder_outputs.shape[-1]
        if projection and encoder_outputs_size != state_size:
            with tf.variable_scope('projection'):
                encoder_output_rank = len(encoder_outputs.shape)
                if encoder_output_rank > 2:
                    sequence_length = tf.shape(encoder_outputs)[1]
                    encoder_outputs = tf.reshape(encoder_outputs,
                                                 [-1, encoder_outputs_size])
                    encoder_outputs = fc_layer(encoder_outputs,
                                               encoder_outputs.shape[-1],
                                               state_size,
                                               activation=None,
                                               initializer=initializer)
                    encoder_outputs = tf.reshape(
                        encoder_outputs, [-1, sequence_length, state_size])
                else:
                    encoder_outputs = fc_layer(encoder_outputs,
                                               encoder_outputs.shape[-1],
                                               state_size,
                                               activation=None,
                                               initializer=initializer)

        # ================ Targets sequence ================
        # Calculate the length of inputs and the batch size
        with tf.variable_scope('sequence'):
            targets_sequence_length = sequence_length_2D(targets)
            start_tokens = tf.tile([GO_SYMBOL], [batch_size])
            end_tokens = tf.tile([END_SYMBOL], [batch_size])
            if is_timeseries:
                start_tokens = tf.cast(start_tokens, tf.float32)
                end_tokens = tf.cast(end_tokens, tf.float32)
            targets_with_go_and_eos = tf.concat([
                tf.expand_dims(start_tokens, 1), targets,
                tf.expand_dims(end_tokens, 1)
            ], 1)
            logging.debug(
                '  targets_with_go: {0}'.format(targets_with_go_and_eos))
            targets_sequence_length_with_eos = targets_sequence_length + 1  # the EOS symbol is 0 so it's not increasing the real length of the sequence

        # ================ Embeddings ================
        if is_timeseries:
            targets_embedded = tf.expand_dims(targets_with_go_and_eos, -1)
            targets_embeddings = None
        else:
            with tf.variable_scope('embedding'):
                if embeddings is not None:
                    embedding_size = embeddings.shape.as_list()[-1]
                    if tied_target_embeddings:
                        state_size = embedding_size
                elif tied_target_embeddings:
                    embedding_size = state_size

                if embeddings is not None:
                    embedding_go = tf.get_variable(
                        'embedding_GO',
                        initializer=tf.random_uniform([1, embedding_size],
                                                      -1.0, 1.0))
                    targets_embeddings = tf.concat([embeddings, embedding_go],
                                                   axis=0)
                else:
                    initializer_obj = get_initializer(initializer)
                    targets_embeddings = tf.get_variable(
                        'embeddings',
                        initializer=initializer_obj(
                            [vocab_size + 1, embedding_size]),
                        regularizer=regularizer)
                logging.debug(
                    '  targets_embeddings: {0}'.format(targets_embeddings))

                targets_embedded = tf.nn.embedding_lookup(
                    targets_embeddings,
                    targets_with_go_and_eos,
                    name='decoder_input_embeddings')
        logging.debug('  targets_embedded: {0}'.format(targets_embedded))

        # ================ Class prediction ================
        if tied_target_embeddings:
            class_weights = tf.transpose(targets_embeddings)
        else:
            initializer_obj = get_initializer(initializer)
            class_weights = tf.get_variable('class_weights',
                                            initializer=initializer_obj(
                                                [state_size, vocab_size + 1]),
                                            regularizer=regularizer)
        logging.debug('  class_weights: {0}'.format(class_weights))
        class_biases = tf.get_variable('class_biases', [vocab_size + 1])
        logging.debug('  class_biases: {0}'.format(class_biases))
        projection_layer = Projection(class_weights, class_biases)

        # ================ RNN ================
        initial_state = encoder_outputs
        with tf.variable_scope('rnn_cells') as vs:
            # Cell
            cell_fun = get_cell_fun(cell_type)

            if num_layers == 1:
                cell = cell_fun(state_size)
                if cell_type.startswith('lstm'):
                    initial_state = LSTMStateTuple(c=initial_state,
                                                   h=initial_state)
            elif num_layers > 1:
                cell = MultiRNNCell(
                    [cell_fun(state_size) for _ in range(num_layers)],
                    state_is_tuple=True)
                if cell_type.startswith('lstm'):
                    initial_state = LSTMStateTuple(c=initial_state,
                                                   h=initial_state)
                initial_state = tuple([initial_state] * num_layers)
            else:
                raise ValueError(
                    'num_layers in recurrent decoser: {}. '
                    'Number of layers in a recurrenct decoder cannot be <= 0'.
                    format(num_layers))

            # Attention
            if attention_mechanism is not None:
                if attention_mechanism == 'bahdanau':
                    attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                        num_units=state_size,
                        memory=encoder_outputs,
                        memory_sequence_length=sequence_length_3D(
                            encoder_outputs))
                elif attention_mechanism == 'luong':
                    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                        num_units=state_size,
                        memory=encoder_outputs,
                        memory_sequence_length=sequence_length_3D(
                            encoder_outputs))
                else:
                    raise ValueError(
                        'Attention mechanism {} not supported'.format(
                            attention_mechanism))
                cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell, attention_mechanism, attention_layer_size=state_size)
                initial_state = cell.zero_state(dtype=tf.float32,
                                                batch_size=batch_size)
                initial_state = initial_state.clone(
                    cell_state=reduce_sequence(encoder_outputs, 'last'))

            for v in tf.global_variables():
                if v.name.startswith(vs.name):
                    logging.debug('  {}: {}'.format(v.name, v))

        # ================ Decoding ================
        def decode(initial_state,
                   cell,
                   helper,
                   beam_width=1,
                   projection_layer=None):
            # The decoder itself
            if beam_width > 1:
                # Tile inputs for beam search decoder
                beam_initial_state = tf.contrib.seq2seq.tile_batch(
                    initial_state, beam_width)
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=cell,
                    embedding=targets_embeddings,
                    start_tokens=start_tokens,
                    end_token=END_SYMBOL,
                    initial_state=beam_initial_state,
                    beam_width=beam_width,
                    output_layer=projection_layer)
            else:
                decoder = BasicDecoder(cell=cell,
                                       helper=helper,
                                       initial_state=initial_state,
                                       output_layer=projection_layer)

            # The decoding operation
            outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder,
                output_time_major=False,
                impute_finished=False if beam_width > 1 else True,
                maximum_iterations=max_sequence_length)

            return outputs

        # ================ Decoding helpers ================
        if is_timeseries:
            train_helper = TimeseriesTrainingHelper(
                inputs=targets_embedded,
                sequence_length=targets_sequence_length_with_eos)
            final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode(
                initial_state,
                cell,
                train_helper,
                projection_layer=projection_layer)
            eval_logits = final_outputs_pred.rnn_output
            train_logits = final_outputs_pred.projection_input
            predictions_sequence = tf.reshape(eval_logits, [batch_size, -1])
            predictions_sequence_length_with_eos = final_sequence_lengths_pred

        else:
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=targets_embedded,
                sequence_length=targets_sequence_length_with_eos)
            final_outputs_train, final_state_train, final_sequence_lengths_train = decode(
                initial_state,
                cell,
                train_helper,
                projection_layer=projection_layer)
            eval_logits = final_outputs_train.rnn_output
            train_logits = final_outputs_train.projection_input
            # train_predictions = final_outputs_train.sample_id

            pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=targets_embeddings,
                start_tokens=start_tokens,
                end_token=END_SYMBOL)
            final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode(
                initial_state,
                cell,
                pred_helper,
                beam_width,
                projection_layer=projection_layer)

            if beam_width > 1:
                predictions_sequence = final_outputs_pred.beam_search_decoder_output.predicted_ids[:, :,
                                                                                                   0]
                # final_outputs_pred..predicted_ids[:,:,0] would work too, but it contains -1s for padding
                predictions_sequence_scores = final_outputs_pred.beam_search_decoder_output.scores[:, :,
                                                                                                   0]
                predictions_sequence_length_with_eos = final_sequence_lengths_pred[:,
                                                                                   0]
            else:
                predictions_sequence = final_outputs_pred.sample_id
                predictions_sequence_scores = final_outputs_pred.rnn_output
                predictions_sequence_length_with_eos = final_sequence_lengths_pred

    logging.debug('  train_logits: {0}'.format(train_logits))
    logging.debug('  eval_logits: {0}'.format(eval_logits))
    logging.debug('  predictions_sequence: {0}'.format(predictions_sequence))
    logging.debug('  predictions_sequence_scores: {0}'.format(
        predictions_sequence_scores))

    return predictions_sequence, predictions_sequence_scores, predictions_sequence_length_with_eos, \
           targets_sequence_length_with_eos, eval_logits, train_logits, class_weights, class_biases
Exemplo n.º 10
0
    def __call__(self,
                 input_sequence,
                 regularizer,
                 dropout_rate,
                 is_training=True):
        if not self.regularize:
            regularizer = None

        # Calculate the length of input_sequence and the batch size
        sequence_length = sequence_length_3D(input_sequence)

        # RNN cell
        cell_fn = get_cell_fun(self.cell_type)

        # initial state
        # init_state = tf.get_variable(
        #   'init_state',
        #   [1, state_size],
        #   initializer=tf.constant_initializer(0.0),
        # )
        # init_state = tf.tile(init_state, [batch_size, 1])

        # main RNN operation
        with tf.variable_scope('rnn_stack',
                               reuse=tf.AUTO_REUSE,
                               regularizer=regularizer) as vs:
            if self.bidirectional:
                # forward direction cell
                fw_cell = lambda state_size: cell_fn(state_size)
                bw_cell = lambda state_size: cell_fn(state_size)
                fw_cells = [
                    fw_cell(self.state_size) for _ in range(self.num_layers)
                ]
                bw_cells = [
                    bw_cell(self.state_size) for _ in range(self.num_layers)
                ]
                rnn_outputs, final_state_fw, final_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=fw_cells,
                    cells_bw=bw_cells,
                    dtype=tf.float32,
                    sequence_length=sequence_length,
                    inputs=input_sequence)

            else:
                cell = lambda state_size: cell_fn(state_size)
                cells = MultiRNNCell(
                    [cell(self.state_size) for _ in range(self.num_layers)],
                    state_is_tuple=True)
                rnn_outputs, final_state = tf.nn.dynamic_rnn(
                    cells,
                    input_sequence,
                    sequence_length=sequence_length,
                    dtype=tf.float32)
                # initial_state=init_state)

            for v in tf.global_variables():
                if v.name.startswith(vs.name):
                    logging.debug('  {}: {}'.format(v.name, v))
            logging.debug('  rnn_outputs: {0}'.format(rnn_outputs))

            rnn_output = reduce_sequence(rnn_outputs, self.reduce_output)
            logging.debug('  reduced_rnn_output: {0}'.format(rnn_output))

        # dropout
        if self.dropout and dropout_rate is not None:
            rnn_output = tf.layers.dropout(rnn_output,
                                           rate=dropout_rate,
                                           training=is_training)
            logging.debug('  dropout_rnn: {0}'.format(rnn_output))

        return rnn_output, rnn_output.shape.as_list()[-1]
Exemplo n.º 11
0
    def concat_dependencies(self, hidden, other_features_hidden):
        if len(self.dependencies) > 0:
            dependencies_hidden = []
            for dependency in self.dependencies:
                # the dependent feature is ensured to be present in final_hidden
                # because we did the topological sort of the features before
                dependency_final_hidden = other_features_hidden[dependency]

                if len(hidden.shape) > 2:
                    if len(dependency_final_hidden.shape) > 2:
                        # matrix matrix -> concat
                        assert hidden.shape[1] == \
                               dependency_final_hidden.shape[1]
                        dependencies_hidden.append(dependency_final_hidden)
                    else:
                        # matrix vector -> tile concat
                        sequence_max_length = hidden.shape[1]
                        multipliers = tf.concat(
                            [[1], [sequence_max_length], [1]],
                            0
                        )
                        tiled_representation = tf.tile(
                            tf.expand_dims(dependency_final_hidden, 1),
                            multipliers
                        )

                        # todo future: maybe modify this with TF2 mask mechanics
                        sequence_length = sequence_length_3D(hidden)
                        mask = tf.sequence_mask(
                            sequence_length,
                            sequence_max_length
                        )
                        tiled_representation = tf.multiply(
                            tiled_representation,
                            tf.cast(mask[:, :, tf.newaxis], dtype=tf.float32)
                        )

                        dependencies_hidden.append(tiled_representation)

                else:
                    if len(dependency_final_hidden.shape) > 2:
                        # vector matrix -> reduce concat
                        reducer = self.dependency_reducers[dependency]
                        dependencies_hidden.append(
                            reducer(dependency_final_hidden)
                        )
                    else:
                        # vector vector -> concat
                        dependencies_hidden.append(dependency_final_hidden)

            try:
                hidden = tf.concat([hidden] + dependencies_hidden, -1)
            except:
                raise ValueError(
                    'Shape mismatch while concatenating dependent features of '
                    '{}: {}. Concatenating the feature activations tensor {} '
                    'with activation tensors of dependencies: {}. The error is '
                    'likely due to a mismatch of the second dimension (sequence'
                    ' length) or a difference in ranks. Likely solutions are '
                    'setting the maximum_sequence_length of all sequential '
                    'features to be the same,  or reduce the output of some '
                    'features, or disabling the bucketing setting '
                    'bucketing_field to None / null, as activating it will '
                    'reduce the length of the field the bucketing is performed '
                    'on.'.format(
                        self.feature_name,
                        self.dependencies,
                        hidden,
                        dependencies_hidden
                    )
                )

        return hidden
Exemplo n.º 12
0
    def decoder_beam_search(self,
                            encoder_output,
                            encoder_end_state=None,
                            training=None):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]
        encoder_sequence_length = sequence_length_3D(encoder_output)

        # ================ predictions =================
        # decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL

        # code sequence based on example found here
        # https://www.tensorflow.org/addons/api_docs/python/tfa/seq2seq/BeamSearchDecoder
        tiled_encoder_output = tfa.seq2seq.tile_batch(
            encoder_output, multiplier=self.beam_width)

        tiled_encoder_end_state = tfa.seq2seq.tile_batch(
            encoder_end_state, multiplier=self.beam_width)

        tiled_encoder_sequence_length = tfa.seq2seq.tile_batch(
            encoder_sequence_length, multiplier=self.beam_width)

        if self.attention_mechanism is not None:
            self.attention_mechanism.setup_memory(
                tiled_encoder_output,
                memory_sequence_length=tiled_encoder_sequence_length)

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size * self.beam_width,
            encoder_state=tiled_encoder_end_state,
            dtype=tf.float32)

        decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(
            cell=self.decoder_rnncell,
            beam_width=self.beam_width,
            output_layer=self.dense_layer)
        # ================ generate logits ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.variables[0]
        (
            first_finished,
            first_inputs,
            first_state
        ) = decoder.initialize(
            decoder_embedding_matrix,
            start_tokens=start_tokens,
            end_token=end_token,
            # following construct required to work around inconsistent handling
            # of encoder_end_state by tfa
            initial_state=decoder_initial_state \
                if len(decoder_initial_state) != 1 \
                else decoder_initial_state[0]
        )

        inputs = first_inputs
        state = first_state

        # create empty logits tensor
        logits = tf.convert_to_tensor(np.array([]).reshape(
            [batch_size, 0, self.num_classes]),
                                      dtype=tf.float32)
        # create empty predictions tensor
        predictions = tf.convert_to_tensor(
            np.array([]).reshape([batch_size, 0]),
            dtype=tf.int32  # todo tf2 need to change to tf.int64
        )
        # create lengths tensor
        lengths = tf.zeros([batch_size], dtype=tf.int32)

        # beam search
        for j in range(maximum_iterations):
            outputs, next_state, next_inputs, finished = decoder.step(
                j, inputs, state, training=training)
            inputs = next_inputs
            state = next_state
            # logtis don't work, temporary workaround
            one_logit = tf.zeros([batch_size, 1, self.num_classes])
            logits = tf.concat([logits, one_logit], axis=1)
            one_predicted_token = tf.expand_dims(outputs.predicted_ids[:, 0],
                                                 axis=1)
            predictions = tf.concat([predictions, one_predicted_token], axis=1)

        # todo tf2: we should first run all the iterations and only at the end
        #  collect logits and predictions. The current implementation is WRONG

        # todo tf2: solve cases when predictions become 0 and then return
        #  to be a number, which confuses the last_predictions later

        last_predictions = tf.gather_nd(predictions,
                                        tf.stack([
                                            tf.range(tf.shape(predictions)[0]),
                                            tf.maximum(lengths - 1, 0)
                                        ],
                                                 axis=1),
                                        name='last_predictions_{}'.format(
                                            self.name))

        probabilities = tf.zeros_like(logits)

        return logits, lengths, predictions, last_predictions, probabilities
Exemplo n.º 13
0
    def decoder_beam_search(self,
                            encoder_output,
                            encoder_end_state=None,
                            training=None):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]
        encoder_sequence_length = sequence_length_3D(encoder_output)

        # ================ predictions =================
        # decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL

        # code sequence based on example found here
        # https://www.tensorflow.org/addons/api_docs/python/tfa/seq2seq/BeamSearchDecoder
        tiled_encoder_output = tfa.seq2seq.tile_batch(
            encoder_output, multiplier=self.beam_width)

        tiled_encoder_end_state = tfa.seq2seq.tile_batch(
            encoder_end_state, multiplier=self.beam_width)

        tiled_encoder_sequence_length = tfa.seq2seq.tile_batch(
            encoder_sequence_length, multiplier=self.beam_width)

        if self.attention_mechanism is not None:
            self.attention_mechanism.setup_memory(
                tiled_encoder_output,
                memory_sequence_length=tiled_encoder_sequence_length)

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size * self.beam_width,
            encoder_state=tiled_encoder_end_state,
            dtype=tf.float32)

        decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(
            cell=self.decoder_rnncell,
            beam_width=self.beam_width,
            output_layer=self.dense_layer,
            output_all_scores=True,
        )
        # ================ generate logits ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.variables[0]

        # beam search
        decoder_output, decoder_state, decoder_lengths = tfa.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=maximum_iterations,
            decoder_init_input=decoder_embedding_matrix,
            decoder_init_kwargs=dict(
                start_tokens=start_tokens,
                end_token=end_token,
                # following construct required to work around inconsistent handling
                # of encoder_end_state by tfa
                initial_state=decoder_initial_state \
                    if len(decoder_initial_state) != 1 \
                    else decoder_initial_state[0]
            ),
        )

        predictions = decoder_output.beam_search_decoder_output.predicted_ids[:, :,
                                                                              0]
        logits = decoder_output.beam_search_decoder_output.scores[:, :, 0, :]
        lengths = decoder_lengths[:, 0]

        last_predictions = tf.gather_nd(predictions,
                                        tf.stack([
                                            tf.range(tf.shape(predictions)[0]),
                                            tf.maximum(lengths - 1, 0)
                                        ],
                                                 axis=1),
                                        name='last_predictions_{}'.format(
                                            self.name))

        probabilities = tf.nn.softmax(logits)

        return logits, lengths, predictions, last_predictions, probabilities
Exemplo n.º 14
0
    def __call__(self, feature_encodings, regularizer, dropout_rate, **kwargs):
        if (self.main_sequence_feature is None
                or self.main_sequence_feature not in feature_encodings):
            for fe_name, fe_properties in feature_encodings.items():
                if fe_properties['type'] in SEQUENCE_TYPES:
                    self.main_sequence_feature = fe_name
                    break

        if self.main_sequence_feature is None:
            raise Exception(
                'No sequence feature available for sequence combiner')

        main_sequence_feature_encoding = \
            feature_encodings[self.main_sequence_feature]

        representation = main_sequence_feature_encoding['representation']
        representations_size = representation.shape[2].value
        representations = [representation]

        scope_name = 'sequence_concat_combiner'
        sequence_length = sequence_length_3D(representation)

        with tf.variable_scope(scope_name):
            # ================ Concat ================
            for fe_name, fe_properties in feature_encodings.items():
                if fe_name is not self.main_sequence_feature:
                    if fe_properties['type'] in SEQUENCE_TYPES and \
                            len(fe_properties['representation'].shape) == 3:
                        # The following check makes sense when
                        # both representations have a specified
                        # sequence length dimension. If they do not,
                        # then this check is simply checking if None == None
                        # and will not catch discrepancies in the different
                        # feature length dimension. Those errors will show up
                        # at training time. Possible solutions to this is
                        # to enforce a length second dimension in
                        # sequential feature placeholders, but that
                        # does not work with BucketedBatcher that requires
                        # the second dimension to be undefined in order to be
                        # able to trim the data points and speed up computation.
                        # So for now we are keeping things like this, make sure
                        # to write in the documentation that training time
                        # dimensions mismatch may occur if the sequential
                        # features have different lengths for some data points.
                        if fe_properties['representation'].shape[1] != \
                                representation.shape[1]:

                            raise ValueError(
                                'The sequence length of the input feature {} '
                                'is {} and is different from the sequence '
                                'length of the main sequence feature {} which '
                                'is {}.\n Shape of {}: {}, shape of {}: {}.\n'
                                'Sequence lengths of all sequential features '
                                'must be the same  in order to be concatenated '
                                'by the sequence concat combiner. '
                                'Try to impose the same max sequence length '
                                'as a preprocessing parameter to both features '
                                'or to reduce the output of {}.'.format(
                                    fe_properties['name'],
                                    fe_properties['representation'].shape[1],
                                    self.main_sequence_feature,
                                    representations_size,
                                    fe_properties['name'],
                                    fe_properties['representation'].shape,
                                    fe_properties['name'],
                                    representation.shape,
                                    fe_properties['name']))
                        # this assumes all sequence representations have the
                        # same sequence length, 2nd dimension
                        representations.append(fe_properties['representation'])

                    elif len(fe_properties['representation'].shape) == 2:
                        sequence_max_length = tf.shape(representation)[1]
                        multipliers = tf.concat(
                            [[1],
                             tf.expand_dims(sequence_max_length, -1), [1]], 0)
                        tiled_representation = tf.tile(
                            tf.expand_dims(fe_properties['representation'], 1),
                            multipliers)
                        logger.debug('  tiled_representation: {0}'.format(
                            tiled_representation))

                        mask = tf.sequence_mask(sequence_length,
                                                sequence_max_length)
                        tiled_representation = tf.multiply(
                            tiled_representation,
                            tf.cast(tf.expand_dims(mask, -1),
                                    dtype=tf.float32))

                        representations.append(tiled_representation)

                    else:
                        raise ValueError(
                            'The representation of {} has rank {} and cannot be'
                            ' concatenated by a sequence concat combiner. '
                            'Only rank 2 and rank 3 tensors are supported.'.
                            format(fe_properties['name'],
                                   len(fe_properties['representation'].shape)))

                    representations_size += fe_properties['size']

            hidden = tf.concat(representations, 2)
            logger.debug('  concat_hidden: {0}'.format(hidden))
            hidden_size = representations_size

            # ================ Mask ================
            mask_matrix = tf.cast(tf.sign(
                tf.reduce_sum(tf.abs(representation), -1, keep_dims=True)),
                                  dtype=tf.float32)
            hidden = tf.multiply(hidden, mask_matrix)

            # ================ Reduce ================
            hidden = reduce_sequence(hidden, self.reduce_output)
            logger.debug('  reduced_concat_hidden: {0}'.format(hidden))

            hidden = tf.identity(hidden, name=scope_name)

        return hidden, hidden_size
Exemplo n.º 15
0
    def decoder_greedy(self,
                       encoder_output,
                       encoder_end_state=None,
                       training=None):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]

        # ================ predictions =================
        greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler()

        decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL
        decoder_inp_emb = self.decoder_embedding(decoder_input)

        if self.attention_mechanism is not None:
            encoder_sequence_length = sequence_length_3D(encoder_output)
            self.attention_mechanism.setup_memory(
                encoder_output, memory_sequence_length=encoder_sequence_length)

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size, encoder_state=encoder_end_state, dtype=tf.float32)

        decoder = tfa.seq2seq.BasicDecoder(cell=self.decoder_rnncell,
                                           sampler=greedy_sampler,
                                           output_layer=self.dense_layer)

        # ================ generate logits ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.variables[0]
        (first_finished, first_inputs,
         first_state) = decoder.initialize(decoder_embedding_matrix,
                                           start_tokens=start_tokens,
                                           end_token=end_token,
                                           initial_state=decoder_initial_state)

        inputs = first_inputs
        state = first_state

        # create empty logits tensor
        logits = tf.convert_to_tensor(np.array([]).reshape(
            [batch_size, 0, self.num_classes]),
                                      dtype=tf.float32)
        # create empty predictions tensor
        predictions = tf.convert_to_tensor(
            np.array([]).reshape([batch_size, 0]),
            dtype=tf.int32  # todo tf2 need to change to tf.int64
        )
        # create lengths tensor
        lengths = tf.zeros([batch_size], dtype=tf.int32)
        already_finished = tf.cast(tf.zeros([batch_size], dtype=tf.int8),
                                   dtype=tf.bool)

        # build up logits
        for j in range(maximum_iterations):
            outputs, next_state, next_inputs, finished = decoder.step(
                j, inputs, state, training=training)
            inputs = next_inputs
            state = next_state
            one_logit = tf.expand_dims(outputs.rnn_output, axis=1)
            logits = tf.concat([logits, one_logit], axis=1)
            one_prediction = tf.expand_dims(outputs.sample_id, axis=1)
            predictions = tf.concat([predictions, one_prediction], axis=1)

            already_finished = tf.logical_or(already_finished, finished)
            lengths += tf.cast(tf.logical_not(already_finished),
                               dtype=tf.int32)

        probabilities = tf.nn.softmax(logits,
                                      name='probabilities_{}'.format(
                                          self.name))

        predictions = tf.cast(predictions,
                              tf.int64,
                              name='predictions_{}'.format(self.name))

        last_predictions = tf.gather_nd(predictions,
                                        tf.stack([
                                            tf.range(tf.shape(predictions)[0]),
                                            tf.maximum(lengths - 1, 0)
                                        ],
                                                 axis=1),
                                        name='last_predictions_{}'.format(
                                            self.name))

        # mask logits
        mask = tf.sequence_mask(lengths,
                                maxlen=logits.shape[1],
                                dtype=tf.float32)

        logits = logits * mask[:, :, tf.newaxis]

        return logits, lengths, predictions, last_predictions, probabilities
Exemplo n.º 16
0
    def decoder_beam_search(
            self,
            encoder_output,
            encoder_end_state=None,
            training=None
    ):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]
        encoder_sequence_length = sequence_length_3D(encoder_output)

        # ================ predictions =================
        decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL
        decoder_inp_emb = self.decoder_embedding(decoder_input)

        # code sequence based on example found here
        # https://www.tensorflow.org/addons/api_docs/python/tfa/seq2seq/BeamSearchDecoder
        tiled_encoder_output = tfa.seq2seq.tile_batch(
            encoder_output,
            multiplier=self.beam_width
        )

        tiled_encoder_end_state = tfa.seq2seq.tile_batch(
            encoder_end_state,
            multiplier=self.beam_width
        )

        tiled_encoder_sequence_length = tfa.seq2seq.tile_batch(
            encoder_sequence_length,
            multiplier=self.beam_width
        )

        if self.attention_mechanism is not None:
            self.attention_mechanism.setup_memory(
                tiled_encoder_output,
                memory_sequence_length=tiled_encoder_sequence_length
            )

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size * self.beam_width,
            encoder_state=tiled_encoder_end_state,
            dtype=tf.float32
        )

        decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(
            cell=self.decoder_rnncell,
            beam_width=self.beam_width,
            output_layer=self.dense_layer,
            output_all_scores=True,
        )
        # ================ generate logits ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.weights[0]

        # beam search
        decoder_output, decoder_state, decoder_lengths = tfa.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=maximum_iterations,
            decoder_init_input=decoder_embedding_matrix,
            decoder_init_kwargs=dict(
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state
            ),
        )

        sequence_id = 0
        predictions = decoder_output.predicted_ids[:, :, sequence_id]
        probabilities = extract_sequence_probabilities(
            decoder_output, self.beam_width, sequence_id=sequence_id
        )

        seq_len_diff = self.max_sequence_length - tf.shape(predictions)[1]
        if seq_len_diff > 0:
            predictions = tf.pad(
                predictions,
                [[0, 0], [0, seq_len_diff]]
            )
            probabilities = tf.pad(
                probabilities,
                [[0, 0], [0, seq_len_diff], [0, 0]],
                constant_values=1.0 / self.vocab_size
            )

        # -1 because they include pad
        lengths = decoder_lengths[:, 0] - 1

        last_predictions = tf.gather_nd(
            predictions,
            tf.stack(
                [tf.range(tf.shape(predictions)[0]),
                 tf.maximum(lengths - 1, 0)],
                axis=1
            ),
            name='last_predictions_{}'.format(self.name)
        )


        # EXPECTED SIZE OF RETURNED TENSORS
        # lengths: shape[batch_size]
        # predictions: shape [batch_size, seq_size]
        # last_predictions: shape[batch_size
        # probabilities: shape[batch_size, seq_size, num_classes]
        return None, lengths, predictions, last_predictions, probabilities
Exemplo n.º 17
0
    def decoder_greedy(
            self,
            encoder_output,
            encoder_end_state=None,
            training=None
    ):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]

        # ================ predictions =================
        greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler()

        decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL
        decoder_inp_emb = self.decoder_embedding(decoder_input)

        if self.attention_mechanism is not None:
            encoder_sequence_length = sequence_length_3D(encoder_output)
            self.attention_mechanism.setup_memory(
                encoder_output,
                memory_sequence_length=encoder_sequence_length
            )

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size,
            encoder_state=encoder_end_state,
            dtype=tf.float32
        )

        decoder = tfa.seq2seq.BasicDecoder(
            cell=self.decoder_rnncell,
            sampler=greedy_sampler,
            output_layer=self.dense_layer
        )

        # ================ generate sequence ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.weights[0]
        decoder_output, decoder_state, decoder_lengths = tfa.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=maximum_iterations,
            decoder_init_input=decoder_embedding_matrix,
            decoder_init_kwargs=dict(
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
            ),
        )

        predictions = decoder_output.sample_id
        seq_len_diff = self.max_sequence_length - tf.shape(predictions)[1]
        if seq_len_diff > 0:
            predictions = tf.pad(
                predictions,
                [[0, 0], [0, seq_len_diff]]
            )
        logits = tf.pad(
            decoder_output.rnn_output,
            [[0, 0], [0, seq_len_diff], [0, 0]]
        )

        # -1 because they include the EOS symbol
        lengths = decoder_lengths - 1

        probabilities = tf.nn.softmax(
            logits,
            name='probabilities_{}'.format(self.name)
        )

        predictions = tf.cast(
            predictions,
            tf.int64,
            name='predictions_{}'.format(self.name)
        )

        last_predictions = tf.gather_nd(
            predictions,
            tf.stack(
                [tf.range(tf.shape(predictions)[0]),
                 tf.maximum(lengths - 1, 0)],  # -1 because of EOS
                axis=1
            ),
            name='last_predictions_{}'.format(self.name)
        )

        # EXPECTED SIZE OF RETURNED TENSORS
        # logits: shape [batch_size, seq_size, num_classes]
        # lengths: shape[batch_size]
        # predictions: shape [batch_size, seq_size]
        # last_predictions: shape[batch_size
        # probabilities: shape[batch_size, seq_size, num_classes]
        return logits, lengths, predictions, last_predictions, probabilities
Exemplo n.º 18
0
    def decoder_greedy(self,
                       encoder_output,
                       encoder_end_state=None,
                       training=None):
        # ================ Setup ================
        batch_size = encoder_output.shape[0]

        # ================ predictions =================
        greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler()

        decoder_input = tf.expand_dims([self.GO_SYMBOL] * batch_size, 1)
        start_tokens = tf.fill([batch_size], self.GO_SYMBOL)
        end_token = self.END_SYMBOL
        decoder_inp_emb = self.decoder_embedding(decoder_input)

        if self.attention_mechanism is not None:
            encoder_sequence_length = sequence_length_3D(encoder_output)
            self.attention_mechanism.setup_memory(
                encoder_output, memory_sequence_length=encoder_sequence_length)

        decoder_initial_state = self.build_decoder_initial_state(
            batch_size, encoder_state=encoder_end_state, dtype=tf.float32)

        decoder = tfa.seq2seq.BasicDecoder(cell=self.decoder_rnncell,
                                           sampler=greedy_sampler,
                                           output_layer=self.dense_layer)

        # ================ generate logits ==================
        maximum_iterations = self.max_sequence_length

        # initialize inference decoder
        decoder_embedding_matrix = self.decoder_embedding.variables[0]
        decoder_output, decoder_state, decoder_lengths = tfa.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=maximum_iterations,
            decoder_init_input=decoder_embedding_matrix,
            decoder_init_kwargs=dict(
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
            ),
        )

        predictions = decoder_output.sample_id
        logits = decoder_output.rnn_output
        lengths = decoder_lengths

        probabilities = tf.nn.softmax(logits,
                                      name='probabilities_{}'.format(
                                          self.name))

        predictions = tf.cast(predictions,
                              tf.int64,
                              name='predictions_{}'.format(self.name))

        last_predictions = tf.gather_nd(predictions,
                                        tf.stack([
                                            tf.range(tf.shape(predictions)[0]),
                                            tf.maximum(lengths - 1, 0)
                                        ],
                                                 axis=1),
                                        name='last_predictions_{}'.format(
                                            self.name))

        # mask logits
        mask = tf.sequence_mask(lengths,
                                maxlen=logits.shape[1],
                                dtype=tf.float32)

        logits = logits * mask[:, :, tf.newaxis]

        return logits, lengths, predictions, last_predictions, probabilities