def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("self_attention_encoder"):
            self._make_placeholders()

            config = BertConfig(
                vocab_size=self.get_hyper('token_vocab_size'),
                hidden_size=self.get_hyper('self_attention_hidden_size'),
                num_hidden_layers=self.get_hyper('self_attention_num_layers'),
                num_attention_heads=self.get_hyper('self_attention_num_heads'),
                intermediate_size=self.get_hyper(
                    'self_attention_intermediate_size'))

            model = BertModel(config=config,
                              is_training=is_train,
                              input_ids=self.placeholders['tokens'],
                              input_mask=self.placeholders['tokens_mask'],
                              use_one_hot_embeddings=False)

            output_pool_mode = self.get_hyper(
                'self_attention_pool_mode').lower()
            if output_pool_mode == 'bert':
                return model.get_pooled_output()
            else:
                seq_token_embeddings = model.get_sequence_output()
                seq_token_masks = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1)  # B
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=seq_token_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_masks)
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("self_attention_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.embedding_layer(
                self.placeholders['tokens'])

            activation_fun = get_activation(self.get_hyper('1dcnn_activation'))
            current_embeddings = seq_tokens_embeddings
            num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'),
                                        self.get_hyper('1dcnn_kernel_width'))
            for (layer_idx,
                 (num_filters,
                  kernel_width)) in enumerate(num_filters_and_width):
                next_embeddings = tf.layers.conv1d(inputs=current_embeddings,
                                                   filters=num_filters,
                                                   kernel_size=kernel_width,
                                                   padding="same")

                # Add residual connections past the first layer.
                if self.get_hyper(
                        '1dcnn_add_residual_connections') and layer_idx > 0:
                    next_embeddings += current_embeddings

                current_embeddings = activation_fun(next_embeddings)

                current_embeddings = tf.nn.dropout(
                    current_embeddings,
                    keep_prob=self.placeholders['dropout_keep_rate'])

            config = BertConfig(
                vocab_size=self.get_hyper('token_vocab_size'),
                hidden_size=self.get_hyper('self_attention_hidden_size'),
                num_hidden_layers=self.get_hyper('self_attention_num_layers'),
                num_attention_heads=self.get_hyper('self_attention_num_heads'),
                intermediate_size=self.get_hyper(
                    'self_attention_intermediate_size'))

            model = BertModel(config=config,
                              is_training=is_train,
                              input_ids=self.placeholders['tokens'],
                              input_mask=self.placeholders['tokens_mask'],
                              use_one_hot_embeddings=False,
                              embedded_input=current_embeddings)

            output_pool_mode = self.get_hyper(
                'self_attention_pool_mode').lower()
            if output_pool_mode == 'bert':
                return model.get_pooled_output()
            else:
                seq_token_embeddings = model.get_sequence_output()
                seq_token_masks = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1)  # B
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=seq_token_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_masks)
Пример #3
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("nbow_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.embedding_layer(
                self.placeholders['tokens'])
            seq_token_mask = self.placeholders['tokens_mask']
            seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B
            return pool_sequence_embedding(
                self.get_hyper('nbow_pool_mode').lower(),
                sequence_token_embeddings=seq_tokens_embeddings,
                sequence_lengths=seq_token_lengths,
                sequence_token_masks=seq_token_mask)
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("rnn_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='tokens_lengths')

            self.placeholders['rnn_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_dropout_keep_rate')

            self.placeholders['rnn_recurrent_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_recurrent_dropout_keep_rate')

            seq_tokens = self.placeholders['tokens']
            seq_tokens_embeddings = self.embedding_layer(seq_tokens)
            seq_tokens_lengths = self.placeholders['tokens_lengths']

            rnn_final_state, token_embeddings = self._encode_with_rnn(
                seq_tokens_embeddings, seq_tokens_lengths)

            output_pool_mode = self.get_hyper('rnn_pool_mode').lower()
            if output_pool_mode == 'rnn_final':
                return rnn_final_state
            else:
                token_mask = tf.expand_dims(tf.range(tf.shape(seq_tokens)[1]),
                                            axis=0)  # 1 x T
                token_mask = tf.tile(
                    token_mask,
                    multiples=(tf.shape(seq_tokens_lengths)[0], 1))  # B x T
                token_mask = tf.cast(
                    token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1),
                    dtype=tf.float32)  # B x T
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=token_embeddings,
                    sequence_lengths=seq_tokens_lengths,
                    sequence_token_masks=token_mask)
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("1dcnn_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.embedding_layer(
                self.placeholders['tokens'])
            seq_tokens_embeddings = self.__add_position_encoding(
                seq_tokens_embeddings)

            activation_fun = get_activation(self.get_hyper('1dcnn_activation'))
            current_embeddings = seq_tokens_embeddings
            num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'),
                                        self.get_hyper('1dcnn_kernel_width'))
            for (layer_idx,
                 (num_filters,
                  kernel_width)) in enumerate(num_filters_and_width):
                next_embeddings = tf.layers.conv1d(inputs=current_embeddings,
                                                   filters=num_filters,
                                                   kernel_size=kernel_width,
                                                   padding="same")

                # Add residual connections past the first layer.
                if self.get_hyper(
                        '1dcnn_add_residual_connections') and layer_idx > 0:
                    next_embeddings += current_embeddings

                current_embeddings = activation_fun(next_embeddings)

                current_embeddings = tf.nn.dropout(
                    current_embeddings,
                    keep_prob=self.placeholders['dropout_keep_rate'])

            seq_token_mask = self.placeholders['tokens_mask']
            seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B
            return pool_sequence_embedding(
                self.get_hyper('1dcnn_pool_mode').lower(),
                sequence_token_embeddings=current_embeddings,
                sequence_lengths=seq_token_lengths,
                sequence_token_masks=seq_token_mask)