Пример #1
0
    def __init__(self, dataset, parameters):

        self.verbose = False

        self.input_token_indices = tf.placeholder(tf.int32, [None], name="input_token_indices")
        self.input_label_indices_vector = tf.placeholder(tf.float32, [None, dataset.number_of_classes],
                                                         name="input_label_indices_vector")
        self.input_label_indices_flat = tf.placeholder(tf.int32, [None], name="input_label_indices_flat")

        self.input_token_character_indices = tf.placeholder(tf.int32, [None, None], name="input_token_indices")

        self.input_token_lengths = tf.placeholder(tf.int32, [None], name="input_token_lengths")

        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        # xavier_initializer is that uses 6/(layer_in + layer_out)
        initializer = tf.contrib.layers.xavier_initializer()

        if parameters['use_character_lstm']:
            # Character embedding layer
            with tf.variable_scope("character_embedding"):
                self.character_embedding_weights = tf.get_variable(
                    "character_embedding_weights",
                    shape=[dataset.alphabet_size, parameters['character_embedding_dimension']],
                    initializer=initializer)
                embedded_characters = tf.nn.embedding_lookup(self.character_embedding_weights,
                                                             self.input_token_character_indices,
                                                             name='embedded_characters')
                if self.verbose: print("embedded_characters: {0}".format(embedded_characters))
                utils_tf.variable_summaries(self.character_embedding_weights)

            # Character LSTM layer
            with tf.variable_scope('character_lstm') as vs:
                character_lstm_output = bidirectional_LSTM(embedded_characters,
                                                           parameters['character_lstm_hidden_state_dimension'],
                                                           initializer,
                                                           sequence_length=self.input_token_lengths,
                                                           output_sequence=False)
                self.character_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                                                  scope=vs.name)

        # Token embedding layer
        with tf.variable_scope("token_embedding"):
            self.token_embedding_weights = tf.get_variable(
                "token_embedding_weights",
                shape=[dataset.vocabulary_size, parameters['token_embedding_dimension']],
                initializer=initializer,
                trainable=not parameters['freeze_token_embeddings'])
            embedded_tokens = tf.nn.embedding_lookup(self.token_embedding_weights, self.input_token_indices)
            utils_tf.variable_summaries(self.token_embedding_weights)

        # Concatenate character LSTM outputs and token embeddings
        if parameters['use_character_lstm']:
            with tf.variable_scope("concatenate_token_and_character_vectors"):
                if self.verbose: print('embedded_tokens: {0}'.format(embedded_tokens))
                token_lstm_input = tf.concat([character_lstm_output, embedded_tokens], axis=1, name='token_lstm_input')
                if self.verbose: print("token_lstm_input: {0}".format(token_lstm_input))
        else:
            token_lstm_input = embedded_tokens

        # Add dropout
        with tf.variable_scope("dropout"):
            token_lstm_input_drop = tf.nn.dropout(token_lstm_input,
                                                  self.dropout_keep_prob,
                                                  # 1,
                                                  name='token_lstm_input_drop')
            if self.verbose: print("token_lstm_input_drop: {0}".format(token_lstm_input_drop))
            token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop,
                                                            axis=0,
                                                            name='token_lstm_input_drop_expanded')
            if self.verbose: print("token_lstm_input_drop_expanded: {0}".format(token_lstm_input_drop_expanded))

        # Token LSTM layer
        with tf.variable_scope('token_lstm') as vs:
            token_lstm_output = bidirectional_LSTM(token_lstm_input_drop_expanded,
                                                   parameters['token_lstm_hidden_state_dimension'],
                                                   initializer, output_sequence=True)
            token_lstm_output_squeezed = tf.squeeze(token_lstm_output, axis=0, name='token_lstm_output_squeezed')
            self.token_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Needed only if Bidirectional LSTM is used for token level
        with tf.variable_scope("feedforward_after_lstm") as vs:
            W = tf.get_variable(
                "W",
                shape=[2 * parameters['token_lstm_hidden_state_dimension'],
                       parameters['token_lstm_hidden_state_dimension']],
                initializer=initializer)
            b = tf.Variable(tf.constant(0.0, shape=[parameters['token_lstm_hidden_state_dimension']]), name="bias")
            # a fc layer with tanh activation
            outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed, W, b, name="output_before_tanh")
            outputs = tf.nn.tanh(outputs, name="output_after_tanh")
            self.output_after_lstm = outputs
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.token_lstm_variables += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope("feedforward_before_crf") as vs:
            self.W_before_crf = tf.get_variable(
                "W",
                shape=[parameters['token_lstm_hidden_state_dimension'], dataset.number_of_classes],
                initializer=initializer)
            self.b_before_crf = tf.Variable(tf.constant(0.0, shape=[dataset.number_of_classes]), name="bias")
            scores = tf.nn.xw_plus_b(outputs, self.W_before_crf, self.b_before_crf, name="scores")
            self.unary_scores = scores
            self.predictions = tf.argmax(self.unary_scores, 1, name="predictions")
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.feedforward_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # CRF layer
        if parameters['use_crf']:
            with tf.variable_scope("crf") as vs:
                small_score = -1000.0
                large_score = 0.0
                sequence_length = tf.shape(self.unary_scores)[0]
                unary_scores_with_start_and_end = tf.concat(
                    [
                        self.unary_scores,
                        tf.tile(tf.constant(small_score, shape=[1, 2]), [sequence_length, 1])
                    ], 1)
                start_unary_scores = [[small_score] * dataset.number_of_classes + [large_score, small_score]]
                end_unary_scores = [[small_score] * dataset.number_of_classes + [small_score, large_score]]
                self.unary_scores = tf.concat([start_unary_scores, unary_scores_with_start_and_end, end_unary_scores],
                                              0)
                start_index = dataset.number_of_classes
                end_index = dataset.number_of_classes + 1
                input_label_indices_flat_with_start_and_end = tf.concat(
                    [
                        tf.constant(start_index, shape=[1]),
                        self.input_label_indices_flat,
                        tf.constant(end_index, shape=[1])
                    ], 0)

                # Apply CRF layer
                sequence_length = tf.shape(self.unary_scores)[0]
                sequence_lengths = tf.expand_dims(sequence_length, axis=0, name='sequence_lengths')
                unary_scores_expanded = tf.expand_dims(self.unary_scores, axis=0, name='unary_scores_expanded')
                input_label_indices_flat_batch = tf.expand_dims(input_label_indices_flat_with_start_and_end, axis=0,
                                                                name='input_label_indices_flat_batch')

                if self.verbose: print('unary_scores_expanded: {0}'.format(unary_scores_expanded))
                if self.verbose: print('input_label_indices_flat_batch: {0}'.format(input_label_indices_flat_batch))
                if self.verbose: print("sequence_lengths: {0}".format(sequence_lengths))
                # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf
                log_likelihood, self.transition_parameters = tf.contrib.crf.crf_log_likelihood(
                    unary_scores_expanded, input_label_indices_flat_batch, sequence_lengths)
                utils_tf.variable_summaries(self.transition_parameters)
                self.loss = tf.reduce_mean(-log_likelihood, name='cross_entropy_mean_loss')
                self.accuracy = tf.constant(1)

                self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        # Do not use CRF layer
        else:
            with tf.variable_scope("crf") as vs:
                self.transition_parameters = tf.get_variable(
                    "transitions",
                    shape=[dataset.number_of_classes + 2, dataset.number_of_classes + 2],
                    initializer=initializer)
                utils_tf.variable_summaries(self.transition_parameters)
                self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

            # Calculate mean cross-entropy loss
            with tf.variable_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.unary_scores,
                                                                 labels=self.input_label_indices_vector,
                                                                 name='softmax')
                self.loss = tf.reduce_mean(losses, name='cross_entropy_mean_loss')
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions,
                                               tf.argmax(self.input_label_indices_vector, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')

        self.define_training_procedure(parameters, dataset)
        self.summary_op = tf.summary.merge_all()
Пример #2
0
    def __init__(self, dataset, parameters):

        self.verbose = False

        # Placeholders for input, output and dropout
        self.input_token_indices = tf.placeholder(tf.int32, [None],
                                                  name="input_token_indices")
        self.input_label_indices_vector = tf.placeholder(
            tf.float32, [None, dataset.number_of_classes],
            name="input_label_indices_vector")
        self.input_label_indices_flat = tf.placeholder(
            tf.int32, [None], name="input_label_indices_flat")
        self.input_token_character_indices = tf.placeholder(
            tf.int32, [None, None], name="input_token_indices")
        self.input_token_lengths = tf.placeholder(tf.int32, [None],
                                                  name="input_token_lengths")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        if parameters['use_pos']:
            self.input_pos_tag_indices = tf.placeholder(
                tf.int32, [None, dataset.number_of_POS_types],
                name="input_pos_tag_indices")
            #self.input_pos_tag_indices_vector = tf.placeholder(tf.float32, [None, dataset.number_of_POS_types], name="input_pos_tag_indices_vector")
            #self.input_pos_tag_indices_flat = tf.placeholder(tf.int32, [None], name="input_pos_tag_indices_flat")
        if parameters['use_gaz']:
            self.input_gaz_indices = tf.placeholder(tf.int32, [None, 1],
                                                    name="input_gaz_indices")
        if parameters['use_aff']:
            self.input_aff_indices = tf.placeholder(tf.int32, [None, 1],
                                                    name="input_aff_indices")

        # Internal parameters
        initializer = tf.contrib.layers.xavier_initializer()

        if parameters['use_character_lstm']:
            # Character-level LSTM
            # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size], which we pass to the LSTM
            # Character embedding layer
            with tf.variable_scope("character_embedding"):
                self.character_embedding_weights = tf.get_variable(
                    "character_embedding_weights",
                    shape=[
                        dataset.alphabet_size,
                        parameters['character_embedding_dimension']
                    ],
                    initializer=initializer)
                embedded_characters = tf.nn.embedding_lookup(
                    self.character_embedding_weights,
                    self.input_token_character_indices,
                    name='embedded_characters')
                if self.verbose:
                    print(
                        "embedded_characters: {0}".format(embedded_characters))
                utils_tf.variable_summaries(self.character_embedding_weights)

            # Character LSTM layer
            with tf.variable_scope('character_lstm') as vs:
                character_lstm_output = bidirectional_LSTM(
                    embedded_characters,
                    parameters['character_lstm_hidden_state_dimension'],
                    initializer,
                    sequence_length=self.input_token_lengths,
                    output_sequence=False)
                self.character_lstm_variables = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Token embedding layer
        with tf.variable_scope("token_embedding"):
            self.token_embedding_weights = tf.get_variable(
                "token_embedding_weights",
                shape=[
                    dataset.vocabulary_size,
                    parameters['token_embedding_dimension']
                ],
                initializer=initializer,
                trainable=not parameters['freeze_token_embeddings'])
            embedded_tokens = tf.nn.embedding_lookup(
                self.token_embedding_weights, self.input_token_indices)
            utils_tf.variable_summaries(self.token_embedding_weights)

        if parameters['use_pos']:
            # POS embedding layer
            # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size,number_of_pos_tags_typs], which we pass to the LSTM
            with tf.variable_scope("pos_tag_embedding"):
                self.pos_tag_embedding_weights = tf.get_variable(
                    "pos_tag_embedding_weights",
                    shape=[dataset.number_of_POS_types
                           ],  #, parameters['character_embedding_dimension']],
                    initializer=initializer,
                    trainable=not parameters['freeze_pos'])
                embedded_pos_tags = tf.nn.embedding_lookup(
                    self.pos_tag_embedding_weights,
                    self.input_pos_tag_indices,
                    name='embedded_pos_tags')
                if self.verbose:
                    print("embedded_pos_tags: {0}".format(embedded_pos_tags))
                utils_tf.variable_summaries(self.pos_tag_embedding_weights)

        if parameters['use_gaz']:
            # GAZ embedding layer
            with tf.variable_scope("gaz_embedding"):
                self.gaz_embedding_weights = tf.get_variable(
                    "gaz_embedding_weights",
                    shape=[2],  #[1],
                    initializer=initializer,
                    trainable=not parameters['freeze_gaz'])
                embedded_gazs = tf.nn.embedding_lookup(
                    self.gaz_embedding_weights,
                    self.input_gaz_indices,
                    name='embedded_gazs')
                if self.verbose:
                    print("embedded_gazs: {0}".format(embedded_gazs))
                utils_tf.variable_summaries(self.gaz_embedding_weights)

        if parameters['use_aff']:
            # affix embedding layer
            with tf.variable_scope("aff_embedding"):
                self.aff_embedding_weights = tf.get_variable(
                    "aff_embedding_weights",
                    shape=[2],  #[1],
                    initializer=initializer,
                    trainable=not parameters['freeze_aff'])
                embedded_affs = tf.nn.embedding_lookup(
                    self.aff_embedding_weights,
                    self.input_aff_indices,
                    name='embedded_affs')
                if self.verbose:
                    print("embedded_affs: {0}".format(embedded_affs))
                utils_tf.variable_summaries(self.aff_embedding_weights)
        '''
        # POS LSTM layer
        with tf.variable_scope('pos_tag_lstm') as vs:
            pos_tag_lstm_output = bidirectional_LSTM(embedded_pos_tags, 1, initializer,
                                                       sequence_length=self.input_token_lengths, output_sequence=False)                self.pos_tag_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        '''

        # Concatenate character LSTM outputs and token embeddings
        # Also: POS
        # SHould be refactored
        if not parameters['use_aff']:
            if not parameters['use_gaz']:
                if parameters[
                        'use_character_lstm'] and not parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        token_lstm_input = tf.concat(
                            [character_lstm_output, embedded_tokens],
                            axis=1,
                            name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                elif parameters['use_character_lstm'] and parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_pos_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        token_lstm_input = tf.concat([
                            embedded_pos_tags, character_lstm_output,
                            embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                else:
                    token_lstm_input = embedded_tokens
            else:
                if parameters[
                        'use_character_lstm'] and not parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_gaz_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(embedded_gazs))
                        token_lstm_input = tf.concat([
                            embedded_gazs, character_lstm_output,
                            embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                elif parameters['use_character_lstm'] and parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_pos_and_gaz_vectors"
                    ):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(embedded_gazs))
                        token_lstm_input = tf.concat([
                            embedded_gazs, embedded_pos_tags,
                            character_lstm_output, embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                else:
                    with tf.variable_scope(
                            "concatenate_token_and_gaz_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        token_lstm_input = tf.concat(
                            [embedded_gazs, embedded_tokens],
                            axis=1,
                            name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
        else:
            if not parameters['use_gaz']:
                if parameters[
                        'use_character_lstm'] and not parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        token_lstm_input = tf.concat([
                            embedded_affs, character_lstm_output,
                            embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                elif parameters['use_character_lstm'] and parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_pos_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        token_lstm_input = tf.concat([
                            embedded_affs, embedded_pos_tags,
                            character_lstm_output, embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                else:
                    token_lstm_input = embedded_tokens
            else:
                if parameters[
                        'use_character_lstm'] and not parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_gaz_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(embedded_gazs))
                        token_lstm_input = tf.concat([
                            embedded_affs, embedded_gazs,
                            character_lstm_output, embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                elif parameters['use_character_lstm'] and parameters['use_pos']:
                    with tf.variable_scope(
                            "concatenate_token_and_character_and_pos_and_gaz_vectors"
                    ):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(embedded_gazs))
                        token_lstm_input = tf.concat([
                            embedded_affs, embedded_gazs, embedded_pos_tags,
                            character_lstm_output, embedded_tokens
                        ],
                                                     axis=1,
                                                     name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))
                else:
                    with tf.variable_scope(
                            "concatenate_token_and_gaz_vectors"):
                        if self.verbose:
                            print(
                                'embedded_tokens: {0}'.format(embedded_tokens))
                        if self.verbose:
                            print('embedded_tokens: {0}'.format(
                                embedded_pos_tags))
                        token_lstm_input = tf.concat(
                            [embedded_affs, embedded_gazs, embedded_tokens],
                            axis=1,
                            name='token_lstm_input')
                        if self.verbose:
                            print("token_lstm_input: {0}".format(
                                token_lstm_input))

        # Add dropout
        with tf.variable_scope("dropout"):
            token_lstm_input_drop = tf.nn.dropout(token_lstm_input,
                                                  self.dropout_keep_prob,
                                                  name='token_lstm_input_drop')
            if self.verbose:
                print(
                    "token_lstm_input_drop: {0}".format(token_lstm_input_drop))
            # https://www.tensorflow.org/api_guides/python/contrib.rnn
            # Prepare data shape to match `rnn` function requirements
            # Current data input shape: (batch_size, n_steps, n_input)
            # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
            token_lstm_input_drop_expanded = tf.expand_dims(
                token_lstm_input_drop,
                axis=0,
                name='token_lstm_input_drop_expanded')
            if self.verbose:
                print("token_lstm_input_drop_expanded: {0}".format(
                    token_lstm_input_drop_expanded))

        # Token LSTM layer
        with tf.variable_scope('token_lstm') as vs:
            token_lstm_output = bidirectional_LSTM(
                token_lstm_input_drop_expanded,
                parameters['token_lstm_hidden_state_dimension'],
                initializer,
                output_sequence=True)
            token_lstm_output_squeezed = tf.squeeze(token_lstm_output, axis=0)
            self.token_lstm_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Needed only if Bidirectional LSTM is used for token level
        with tf.variable_scope("feedforward_after_lstm") as vs:
            W = tf.get_variable(
                "W",
                shape=[
                    2 * parameters['token_lstm_hidden_state_dimension'],
                    parameters['token_lstm_hidden_state_dimension']
                ],
                initializer=initializer)
            b = tf.Variable(tf.constant(
                0.0, shape=[parameters['token_lstm_hidden_state_dimension']]),
                            name="bias")
            outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed,
                                      W,
                                      b,
                                      name="output_before_tanh")
            outputs = tf.nn.tanh(outputs, name="output_after_tanh")
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.token_lstm_variables += tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope("feedforward_before_crf") as vs:
            W = tf.get_variable(
                "W",
                shape=[
                    parameters['token_lstm_hidden_state_dimension'],
                    dataset.number_of_classes
                ],
                initializer=initializer)
            b = tf.Variable(tf.constant(0.0,
                                        shape=[dataset.number_of_classes]),
                            name="bias")
            scores = tf.nn.xw_plus_b(outputs, W, b, name="scores")
            self.unary_scores = scores
            self.predictions = tf.argmax(self.unary_scores,
                                         1,
                                         name="predictions")
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.feedforward_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # CRF layer
        if parameters['use_crf']:
            with tf.variable_scope("crf") as vs:
                # Add start and end tokens
                small_score = -1000.0
                large_score = 0.0
                sequence_length = tf.shape(self.unary_scores)[0]
                unary_scores_with_start_and_end = tf.concat([
                    self.unary_scores,
                    tf.tile(tf.constant(small_score, shape=[1, 2]),
                            [sequence_length, 1])
                ], 1)
                start_unary_scores = [
                    [small_score] * dataset.number_of_classes +
                    [large_score, small_score]
                ]
                end_unary_scores = [[small_score] * dataset.number_of_classes +
                                    [small_score, large_score]]
                self.unary_scores = tf.concat([
                    start_unary_scores, unary_scores_with_start_and_end,
                    end_unary_scores
                ], 0)
                start_index = dataset.number_of_classes
                end_index = dataset.number_of_classes + 1
                input_label_indices_flat_with_start_and_end = tf.concat([
                    tf.constant(start_index, shape=[1]),
                    self.input_label_indices_flat,
                    tf.constant(end_index, shape=[1])
                ], 0)

                # Apply CRF layer
                sequence_length = tf.shape(self.unary_scores)[0]
                sequence_lengths = tf.expand_dims(sequence_length,
                                                  axis=0,
                                                  name='sequence_lengths')
                unary_scores_expanded = tf.expand_dims(
                    self.unary_scores, axis=0, name='unary_scores_expanded')
                input_label_indices_flat_batch = tf.expand_dims(
                    input_label_indices_flat_with_start_and_end,
                    axis=0,
                    name='input_label_indices_flat_batch')
                if self.verbose:
                    print('unary_scores_expanded: {0}'.format(
                        unary_scores_expanded))
                if self.verbose:
                    print('input_label_indices_flat_batch: {0}'.format(
                        input_label_indices_flat_batch))
                if self.verbose:
                    print("sequence_lengths: {0}".format(sequence_lengths))
                # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf
                # Compute the log-likelihood of the gold sequences and keep the transition params for inference at test time.
                self.transition_parameters = tf.get_variable(
                    "transitions",
                    shape=[
                        dataset.number_of_classes + 2,
                        dataset.number_of_classes + 2
                    ],
                    initializer=initializer)
                utils_tf.variable_summaries(self.transition_parameters)
                log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                    unary_scores_expanded,
                    input_label_indices_flat_batch,
                    sequence_lengths,
                    transition_params=self.transition_parameters)
                self.loss = tf.reduce_mean(-log_likelihood,
                                           name='cross_entropy_mean_loss')
                self.accuracy = tf.constant(1)

                self.crf_variables = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Do not use CRF layer
        else:
            with tf.variable_scope("crf") as vs:
                self.transition_parameters = tf.get_variable(
                    "transitions",
                    shape=[
                        dataset.number_of_classes + 2,
                        dataset.number_of_classes + 2
                    ],
                    initializer=initializer)
                utils_tf.variable_summaries(self.transition_parameters)
                self.crf_variables = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

            # Calculate mean cross-entropy loss
            with tf.variable_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.unary_scores,
                    labels=self.input_label_indices_vector,
                    name='softmax')
                self.loss = tf.reduce_mean(losses,
                                           name='cross_entropy_mean_loss')
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(
                    self.predictions,
                    tf.argmax(self.input_label_indices_vector, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                       'float'),
                                               name='accuracy')

        self.define_training_procedure(parameters)
        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(
            max_to_keep=parameters['maximum_number_of_epochs']
        )  # defaults to saving all variables
Пример #3
0
    def __init__(self, dataset, parameters):

        self.verbose = False

        # Placeholders for input, output and dropout
        self.input_token_indices = tf.placeholder(tf.int32, [None,None], name="input_token_indices") #[batch, sequence_length]
        self.input_sequence_lengths = tf.placeholder(tf.int32, [None], name="input_sequence_lengths") #[batch_size]
        self.input_label_indices_vector = tf.placeholder(tf.int32, [None,None, dataset.number_of_classes], name="input_label_indices_vector")
        self.input_label_indices_flat = tf.placeholder(tf.int32, [None,None], name="input_label_indices_flat") #[batch_size, max_sentence_length]
        self.input_token_character_indices = tf.placeholder(tf.int32, [None,None, None], name="input_token_character_indices")# [batch, sequence_length, token_length]
        self.input_token_lengths = tf.placeholder(tf.int32, [None,None], name="input_token_lengths") # [batch, sequence_length]
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        self.embedding_dim = parameters['embedding_dimension']
        batch_size = tf.shape(self.input_token_character_indices)[0]
        sentence_size = tf.shape(self.input_token_character_indices)[1]
        token_size = tf.shape(self.input_token_character_indices)[2]
        # Internal parameters
        initializer = tf.contrib.layers.xavier_initializer()

        if parameters['use_character_lstm']:
            # Character-level LSTM
            # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size], which we pass to the LSTM

            # Character embedding layer
            with tf.variable_scope("character_embedding"):
                self.character_embedding_weights = tf.get_variable(
                    "character_embedding_weights",
                    shape=[dataset.alphabet_size, parameters['character_embedding_dimension']],
                    initializer=initializer)
                embedded_characters = tf.nn.embedding_lookup(self.character_embedding_weights, self.input_token_character_indices, name='embedded_characters')
                if self.verbose: print("embedded_characters: {0}".format(embedded_characters))
                utils_tf.variable_summaries(self.character_embedding_weights)

            # Character LSTM layer
            with tf.variable_scope('character_lstm') as vs:
                #batch_size = tf.shape(embedded_characters)[0]
                #sentence_size = tf.shape(embedded_characters)[1]
                #token_size = tf.shape(embedded_characters)[2]
                embedded_characters = tf.reshape(embedded_characters,
                                                 [batch_size*sentence_size, token_size, parameters['character_embedding_dimension']])
                input_token_lengths = tf.reshape(self.input_token_lengths, [-1])
                if parameters['gru_neuron']:
                    character_lstm_output = bidirectional_GRU(embedded_characters,
                                                               parameters['character_lstm_hidden_state_dimension'],
                                                               parameters['character_hidden_layer'],
                                                               initializer,
                                                               sequence_length=input_token_lengths,
                                                               output_sequence=False)

                else:
                    character_lstm_output = bidirectional_LSTM(embedded_characters, parameters['character_lstm_hidden_state_dimension'], initializer,
                                                           sequence_length=input_token_lengths, output_sequence=False)
                self.character_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

                character_lstm_output = tf.reshape(character_lstm_output, [batch_size, sentence_size, 2*parameters['character_lstm_hidden_state_dimension']])

        # Token embedding layer
        with tf.variable_scope("token_embedding"):
            self.token_embedding_weights = tf.get_variable(
                "token_embedding_weights",
                shape=[dataset.vocabulary_size, parameters['embedding_dimension']],
                initializer=initializer,
                trainable=not parameters['freeze_token_embeddings'])
            embedded_tokens = tf.nn.embedding_lookup(self.token_embedding_weights, self.input_token_indices)
            utils_tf.variable_summaries(self.token_embedding_weights)

        # Concatenate character LSTM outputs and token embeddings
        if parameters['use_character_lstm']:
            with tf.variable_scope("concatenate_token_and_character_vectors"):
                if self.verbose: print('embedded_tokens: {0}'.format(embedded_tokens))
                token_lstm_input = tf.concat([character_lstm_output, embedded_tokens], axis=-1, name='token_lstm_input')
                if self.verbose: print("token_lstm_input: {0}".format(token_lstm_input))
        else:
            token_lstm_input = embedded_tokens

        # Add dropout
        with tf.variable_scope("dropout"):
            token_lstm_input_drop = tf.nn.dropout(token_lstm_input, self.dropout_keep_prob, name='token_lstm_input_drop')
            if self.verbose: print("token_lstm_input_drop: {0}".format(token_lstm_input_drop))
            # https://www.tensorflow.org/api_guides/python/contrib.rnn
            # Prepare data shape to match `rnn` function requirements
            # Current data input shape: (batch_size, n_steps, n_input)
            # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
     #       token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded')
            token_lstm_input_drop_expanded = token_lstm_input_drop
            if self.verbose: print("token_lstm_input_drop_expanded: {0}".format(token_lstm_input_drop_expanded))

        # Token LSTM layer
        with tf.variable_scope('token_lstm') as vs:
            if parameters['gru_neuron']:
                token_lstm_output = bidirectional_GRU(token_lstm_input_drop_expanded,
                                                       parameters['token_lstm_hidden_state_dimension'],
                                                       parameters['token_hidden_layer'],
                                                       initializer=initializer,
                                                       sequence_length = self.input_sequence_lengths,
                                                       output_sequence=True)
            else:
                token_lstm_output = bidirectional_LSTM(token_lstm_input_drop_expanded,
                                                       parameters['token_lstm_hidden_state_dimension'],
                                                       initializer=initializer,
                                                       sequence_length = self.input_sequence_lengths,
                                                       output_sequence=True)
            self.token_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Needed only if Bidirectional LSTM is used for token level
        with tf.variable_scope("feedforward_after_lstm") as vs:
            token_lstm_output_squeezed = tf.reshape(token_lstm_output, [batch_size * sentence_size,2*parameters['token_lstm_hidden_state_dimension']])

            W = tf.get_variable(
                "W",
                shape=[2 * parameters['token_lstm_hidden_state_dimension'], parameters['token_lstm_hidden_state_dimension']],
                initializer=initializer)
            b = tf.Variable(tf.constant(0.0, shape=[parameters['token_lstm_hidden_state_dimension']]), name="bias")
            outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed, W, b, name="output_before_tanh")
            outputs = tf.nn.tanh(outputs, name="output_after_tanh")
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.token_lstm_variables += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope("feedforward_before_crf") as vs:
            W = tf.get_variable(
                "W",
                shape=[parameters['token_lstm_hidden_state_dimension'], dataset.number_of_classes],
                initializer=initializer)
            b = tf.Variable(tf.constant(0.0, shape=[dataset.number_of_classes]), name="bias")
            scores = tf.nn.xw_plus_b(outputs, W, b, name="scores")
            self.unary_scores = tf.reshape(scores, [batch_size, sentence_size, dataset.number_of_classes])
            self.predictions = tf.argmax(self.unary_scores, 1, name="predictions")
            utils_tf.variable_summaries(W)
            utils_tf.variable_summaries(b)
            self.feedforward_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Add dropout
        #with tf.variable_scope("dropout"):
        #    self.unary_scores = tf.nn.dropout(self.unary_scores, self.dropout_keep_prob,
        #                                          name='crf__input_drop')
        #    if self.verbose: print("crf_input_drop: {0}".format(self.unary_scores))
            # https://www.tensorflow.org/api_guides/python/contrib.rnn
            # Prepare data shape to match `rnn` function requirements
            # Current data input shape: (batch_size, n_steps, n_input)
            # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
            #       token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded')
        # CRF layer
        if parameters['use_crf']:
            with tf.variable_scope("crf") as vs:
                # Add start and end tokens
                #small_score = -1000.0
                #large_score = 0.0
                #sequence_length = tf.shape(self.unary_scores)[1]
                #tmp = tf.tile( tf.constant(small_score, shape=[1, 2]) , [sentence_size, 1])
                #unary_scores_with_start_and_end = tf.concat([self.unary_scores, tmp], 1)
                #start_unary_scores = [[small_score] * dataset.number_of_classes + [large_score, small_score]]
                #end_unary_scores = [[small_score] * dataset.number_of_classes + [small_score, large_score]]
                #self.unary_scores = tf.concat([start_unary_scores, unary_scores_with_start_and_end, end_unary_scores], 0)
                #start_index = dataset.number_of_classes
                #end_index = dataset.number_of_classes + 1
                #input_label_indices_flat_with_start_and_end = tf.concat([ tf.constant(start_index, shape=[1]), self.input_label_indices_flat, tf.constant(end_index, shape=[1]) ], 0)

                # Apply CRF layer
                #sequence_length = tf.shape(self.unary_scores)[0]
                #sequence_lengths = tf.expand_dims(sequence_length, axis=0, name='sequence_lengths')
                #unary_scores_expanded = tf.expand_dims(self.unary_scores, axis=0, name='unary_scores_expanded')
                #input_label_indices_flat_batch = tf.expand_dims(input_label_indices_flat_with_start_and_end, axis=0, name='input_label_indices_flat_batch')
                #if self.verbose: print('unary_scores_expanded: {0}'.format(unary_scores_expanded))
                #if self.verbose: print('input_label_indices_flat_batch: {0}'.format(input_label_indices_flat_batch))
                #if self.verbose: print("sequence_lengths: {0}".format(sequence_lengths))
                # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf
                # Compute the log-likelihood of the gold sequences and keep the transition params for inference at test time.

                self.transition_parameters=tf.get_variable(
                    "transitions",
                    #shape=[dataset.number_of_classes+2, dataset.number_of_classes+2],
                    shape=[dataset.number_of_classes, dataset.number_of_classes],
                    initializer=initializer)
                utils_tf.variable_summaries(self.transition_parameters)
                #log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                #    unary_scores_expanded, input_label_indices_flat_batch, sequence_lengths, transition_params=self.transition_parameters)

                log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                         self.unary_scores, self.input_label_indices_flat, self.input_sequence_lengths, transition_params=self.transition_parameters)

                #regularizer = tf.nn.l2_loss(W)
                self.loss =  tf.reduce_mean(-log_likelihood, name='cross_entropy_mean_loss')
                self.accuracy = tf.constant(1)

                self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        # Do not use CRF layer
        else:
            #with tf.variable_scope("crf") as vs:
            #    self.transition_parameters = tf.get_variable(
            #        "transitions",
            #        shape=[dataset.number_of_classes+2, dataset.number_of_classes+2],
            #        initializer=initializer)
            #    utils_tf.variable_summaries(self.transition_parameters)
            #    self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

            # Calculate mean cross-entropy loss
            with tf.variable_scope("loss"):
                self.unary_scores = tf.reshape(self.unary_scores, [-1,dataset.number_of_classes])
                self.input_label_indices_vector = tf.reshape(self.input_label_indices_vector, [-1,dataset.number_of_classes])
                losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.unary_scores, labels=self.input_label_indices_vector, name='softmax')
                mask = tf.sequence_mask(self.input_sequence_lengths)
                losses = tf.boolean_mask(losses, mask)
                self.loss = tf.reduce_mean(losses, name='cross_entropy_mean_loss')
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_label_indices_vector, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')
                self. unary_scores = tf.reshape(self.unary_scores, [batch_size,-1, dataset.number_of_classes])
                self.input_label_indices_vector = tf.reshape(self.input_label_indices_vector,
                                                             [batch_size, -1, dataset.number_of_classes])
        self.define_training_procedure(parameters)
        self.summary_op = tf.summary.merge_all()