예제 #1
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope('graph_encoder'):
            self._make_placeholders()

            node_tokens = self.token_embedding_layer(self.placeholders['node_token_ids'], suffix='_node')
            print('node tokens', node_tokens.shape)
            node_token_masks = self.placeholders['node_masks']
            print('node token masks', node_token_masks.shape)
            node_token_lens = tf.reduce_sum(node_token_masks, axis=1)  # B

            token_encoding = pool_sequence_embedding('weighted_mean',
                                                     sequence_token_embeddings=node_tokens,
                                                     sequence_lengths=node_token_lens,
                                                     sequence_token_masks=node_token_masks)

            print('token encoding', token_encoding.shape)

            node_encodings = self._build_stack(node_tokens, is_train)

            if node_encodings is not None:
                print('node encoding', node_encodings.shape)
                graph_encoding = pool_sequence_embedding('mean',
                                                         sequence_token_embeddings=node_encodings,
                                                         sequence_lengths=node_token_lens,
                                                         sequence_token_masks=node_token_masks)

        if node_encodings is None:
            return token_encoding
        if self.get_hyper('is_plain'):
            return graph_encoding

        return token_encoding + graph_encoding
예제 #2
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("self_attention_encoder"):
            self._make_placeholders()

            config = BertConfig(
                vocab_size=self.get_hyper('token_vocab_size'),
                hidden_size=self.get_hyper('self_attention_hidden_size'),
                num_hidden_layers=self.get_hyper('self_attention_num_layers'),
                num_attention_heads=self.get_hyper('self_attention_num_heads'),
                intermediate_size=self.get_hyper(
                    'self_attention_intermediate_size'))

            model = BertModel(config=config,
                              is_training=is_train,
                              input_ids=self.placeholders['tokens'],
                              input_mask=self.placeholders['tokens_mask'],
                              use_one_hot_embeddings=False)

            output_pool_mode = self.get_hyper(
                'self_attention_pool_mode').lower()
            if output_pool_mode == 'bert':
                return model.get_pooled_output()
            else:
                seq_token_embeddings = model.get_sequence_output()
                seq_token_masks = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1)  # B
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=seq_token_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_masks,
                    is_train=is_train)
    def make_model(self, is_train: bool=False) -> tf.Tensor:
        with tf.variable_scope("1dcnn_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.embedding_layer(self.placeholders['tokens'])
            seq_tokens_embeddings = self.__add_position_encoding(seq_tokens_embeddings)

            activation_fun = get_activation(self.get_hyper('1dcnn_activation'))
            current_embeddings = seq_tokens_embeddings
            num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'), self.get_hyper('1dcnn_kernel_width'))
            for (layer_idx, (num_filters, kernel_width)) in enumerate(num_filters_and_width):
                next_embeddings = tf.layers.conv1d(
                    inputs=current_embeddings,
                    filters=num_filters,
                    kernel_size=kernel_width,
                    padding="same")

                # Add residual connections past the first layer.
                if self.get_hyper('1dcnn_add_residual_connections') and layer_idx > 0:
                    next_embeddings += current_embeddings

                current_embeddings = activation_fun(next_embeddings)

                current_embeddings = tf.nn.dropout(current_embeddings,
                                                   keep_prob=self.placeholders['dropout_keep_rate'])

            seq_token_mask = self.placeholders['tokens_mask']
            seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B
            return pool_sequence_embedding(self.get_hyper('1dcnn_pool_mode').lower(),
                                           sequence_token_embeddings=current_embeddings,
                                           sequence_lengths=seq_token_lengths,
                                           sequence_token_masks=seq_token_mask)
예제 #4
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("cbow_encoder"):
            self._make_placeholders()

            self.seq_tokens_embeddings = self.embedding_layer(
                self.placeholders['tokens']
            )  # batch size x max seq len x emb dim
            seq_token_mask = self.placeholders['tokens_mask']
            seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B

            batch_seq_len = self.seq_tokens_embeddings.get_shape(
            ).dims[1].value

            # pad seqs
            paddings = tf.constant([[0, 0], [2, 2], [0, 0]])
            self.seq_tokens_embeddings = tf.pad(self.seq_tokens_embeddings,
                                                paddings, "CONSTANT")

            self.seq_tokens_embeddings = tf.map_fn(
                self.token_sums,
                tf.range(0, batch_seq_len, 1),
                parallel_iterations=1,
                dtype=(tf.float32))  # max seq len x batch size x emb dim

            # perm dims
            self.seq_tokens_embeddings = tf.transpose(
                self.seq_tokens_embeddings,
                perm=[1, 0, 2])  # batch size x max seq len x emb dim

            return pool_sequence_embedding(
                self.get_hyper('cbow_pool_mode').lower(),
                sequence_token_embeddings=self.seq_tokens_embeddings,
                sequence_lengths=seq_token_lengths,
                sequence_token_masks=seq_token_mask,
                is_train=is_train)
예제 #5
0
    def make_model(self, is_train: bool = False):
        # with tf.compat.v1.variable_scope("gpt2_encoder_" + name):
        self._make_placeholders()
        """
        GPT-2 uses Transformer's decoder as a building block, excluding the encoder-decoder attention module.
        Thus, the only difference with Bert's building blocks(Transformer's encoder) is the masked attention.
        However, in this implementation the masked attention is used for the BertEncoder.
        Therefore the BertModel will be used and adjust the hyper-parameters to be the same of those of the
        pretrained GPT-2 models.
        """
        cache_dir = "../resources/hugging_face/gpt2/"
        model = TFGPT2Model.from_pretrained('gpt2',
                                            cache_dir=cache_dir,
                                            return_dict=True)

        output = model(self.placeholders['tokens'], training=is_train)

        seq_token_embeddings = output.last_hidden_state

        seq_token_masks = self.placeholders['tokens_mask']
        seq_token_lengths = tf.reduce_sum(input_tensor=seq_token_masks,
                                          axis=1)  # B
        return pool_sequence_embedding(
            "weighted_mean",
            sequence_token_embeddings=seq_token_embeddings,
            sequence_lengths=seq_token_lengths,
            sequence_token_masks=seq_token_masks)
예제 #6
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("self_attention_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.embedding_layer(
                self.placeholders['tokens'])

            activation_fun = get_activation(self.get_hyper('1dcnn_activation'))
            current_embeddings = seq_tokens_embeddings
            num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'),
                                        self.get_hyper('1dcnn_kernel_width'))
            for (layer_idx,
                 (num_filters,
                  kernel_width)) in enumerate(num_filters_and_width):
                next_embeddings = tf.layers.conv1d(inputs=current_embeddings,
                                                   filters=num_filters,
                                                   kernel_size=kernel_width,
                                                   padding="same")

                # Add residual connections past the first layer.
                if self.get_hyper(
                        '1dcnn_add_residual_connections') and layer_idx > 0:
                    next_embeddings += current_embeddings

                current_embeddings = activation_fun(next_embeddings)

                current_embeddings = tf.nn.dropout(
                    current_embeddings,
                    keep_prob=self.placeholders['dropout_keep_rate'])

            config = BertConfig(
                vocab_size=self.get_hyper('token_vocab_size'),
                hidden_size=self.get_hyper('self_attention_hidden_size'),
                num_hidden_layers=self.get_hyper('self_attention_num_layers'),
                num_attention_heads=self.get_hyper('self_attention_num_heads'),
                intermediate_size=self.get_hyper(
                    'self_attention_intermediate_size'))

            model = BertModel(config=config,
                              is_training=is_train,
                              input_ids=self.placeholders['tokens'],
                              input_mask=self.placeholders['tokens_mask'],
                              use_one_hot_embeddings=False,
                              embedded_input=current_embeddings)

            output_pool_mode = self.get_hyper(
                'self_attention_pool_mode').lower()
            if output_pool_mode == 'bert':
                return model.get_pooled_output()
            else:
                seq_token_embeddings = model.get_sequence_output()
                seq_token_masks = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1)  # B
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=seq_token_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_masks,
                    is_train=is_train)
예제 #7
0
    def make_model(self, is_train: bool = True) -> tf.Tensor:
        with tf.variable_scope("elmo_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='tokens_lengths')

            self.placeholders['tokens'] = \
            tf.placeholder(tf.int32,
                           shape=[None, self.get_hyper('max_num_tokens')],
                           name='tokens')

            self.placeholders['tokens_str'] = \
            tf.placeholder(tf.string,
                           shape=[None, self.get_hyper('max_num_tokens')],
                           name='tokens_str')

            seq_tokens_tokens = self.placeholders['tokens']
            seq_tokens = self.placeholders['tokens_str']
            seq_tokens_lengths = self.placeholders['tokens_lengths']

            # ## DEBUGGING: OUTPUT SHAPES
            # print("Sequence Tokens Shape: %s" % seq_tokens.shape)
            # print("Sequence Tokens Lengths: %s" % seq_tokens_lengths)

            ## pull elmo model from tensorflow hub
            elmo = hub.Module("https://tfhub.dev/google/elmo/2",
                              trainable=is_train)
            token_embeddings = elmo(
                {
                    "tokens": seq_tokens_tokens,
                    "sequence_len": seq_tokens_lengths
                },
                signature='tokens',
                as_dict=True)['elmo']  ## [batch_size, max_length, 1024 or 512]

            ## add the elmo model to the trainable variables

            output_pool_mode = self.get_hyper('elmo_pool_mode').lower()
            if output_pool_mode is ELMO_FINAL:
                return token_embeddings
            else:
                token_mask = tf.expand_dims(tf.range(tf.shape(seq_tokens)[1]),
                                            axis=0)  # 1 x T
                token_mask = tf.tile(
                    token_mask,
                    multiples=(tf.shape(seq_tokens_lengths)[0], 1))  # B x T
                token_mask = tf.cast(
                    token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1),
                    dtype=tf.float32)  # B x T
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=token_embeddings,
                    sequence_lengths=seq_tokens_lengths,
                    sequence_token_masks=token_mask)
예제 #8
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope('ast_tokens_encoder'):
            self._make_placeholders()

            node_tokens = self.embedding_layer(self.placeholders['tokens'])
            node_token_masks = self.placeholders['node_masks']
            node_token_lens = tf.reduce_sum(node_token_masks, axis=1)  # B
            token_encoding = pool_sequence_embedding('mean',
                                                     sequence_token_embeddings=node_tokens,
                                                     sequence_lengths=node_token_lens,
                                                     sequence_token_masks=node_token_masks)
        return token_encoding
예제 #9
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("nbow_encoder"):
            self._make_placeholders()

            seq_tokens_embeddings = self.pretrained_embedding_layer(self.placeholders['tokens'])
            seq_token_mask = self.placeholders['tokens_mask']
            seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B
            return pool_sequence_embedding(
                self.get_hyper('nbow_pool_mode').lower(),
                sequence_token_embeddings=seq_tokens_embeddings,
                sequence_lengths=seq_token_lengths,
                sequence_token_masks=seq_token_mask)
예제 #10
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("rnn_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='tokens_lengths')

            self.placeholders['rnn_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_dropout_keep_rate')

            self.placeholders['rnn_recurrent_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_recurrent_dropout_keep_rate')

            seq_tokens = self.placeholders['tokens']
            seq_tokens_embeddings = self.embedding_layer(seq_tokens)
            seq_tokens_lengths = self.placeholders['tokens_lengths']

            trans_seq_tokens_embeddings = tf.transpose(seq_tokens_embeddings,
                                                       [1, 0, 2])
            lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, 64)

            rnn_token_embeddings, rnn_final_state = lstm(
                trans_seq_tokens_embeddings)
            #rnn_final_state, token_embeddings = self._encode_with_rnn(seq_tokens_embeddings, seq_tokens_lengths)
            token_embeddings = tf.transpose(rnn_token_embeddings, [1, 0, 2])

            output_pool_mode = self.get_hyper('rnn_pool_mode').lower()
            if output_pool_mode == 'rnn_final':
                return rnn_final_state
            else:
                token_mask = tf.expand_dims(tf.range(tf.shape(seq_tokens)[1]),
                                            axis=0)  # 1 x T
                token_mask = tf.tile(
                    token_mask,
                    multiples=(tf.shape(seq_tokens_lengths)[0], 1))  # B x T
                token_mask = tf.cast(
                    token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1),
                    dtype=tf.float32)  # B x T
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=token_embeddings,
                    sequence_lengths=seq_tokens_lengths,
                    sequence_token_masks=token_mask)
예제 #11
0
    def make_model(self, is_train: bool = False) -> tf.Tensor:
        with tf.variable_scope("rnn_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='tokens_lengths')

            self.placeholders['rnn_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_dropout_keep_rate')

            self.placeholders['rnn_recurrent_dropout_keep_rate'] = \
                tf.placeholder(tf.float32,
                               shape=[],
                               name='rnn_recurrent_dropout_keep_rate')

            self.seq_tokens = self.placeholders['tokens']
            seq_tokens_embeddings = self.embedding_layer(self.seq_tokens)
            seq_tokens_lengths = self.placeholders['tokens_lengths']

            rnn_final_state, self.token_embeddings = self._encode_with_rnn(
                seq_tokens_embeddings, seq_tokens_lengths)

            # TODO: Add call for Attention code.
            # Try to use batch queries so you can do bmm (TensorFlow equivalent)
            # Dim: batch_size, max_seq_len, emb_dim
            # Iterate over max_seq_len. For each token in sequence, do Attention
            #tf.map_fn -> runs a function over a set of values

            embeds = self.token_embeddings
            if (self.get_hyper('rnn_do_attention') == True):
                self.batch_seq_len = self.seq_tokens.get_shape().dims[1].value
                # self.attention = BahdanauAttention(self.batch_seq_len)
                # Do attention on each timestep
                batch_num = 100
                # print("Starting Attention Setup")
                self.weights = tf.zeros([batch_num, 1, self.batch_seq_len])
                # print("Set up Weights")
                # self.ctx_v = tf.zeros(tf.shape(self.token_embeddings[:, 0:1, :]))
                # print("Set up Context Vector")

                # run attention_hw_style on all tokens
                # print("Running Attention")
                context_list = tf.map_fn(self.attention_hw_style,
                                         tf.range(0, self.batch_seq_len, 1),
                                         dtype=(tf.float32))

                # print("Concatenating Context Vectors with Token Embeddings")

                context = context_list
                # if (size == 4), squeeze, else dont
                if (len(context_list.shape.dims) == 4):
                    context = tf.squeeze(context_list)

                context = tf.concat(context, 1)

                # if (context.shape.dims != None):
                '''
                if (tf.rank(context)[:] > 2):
                    context = tf.transpose(context, tf.concat([1, 0], tf.range(2, tf.rank(context)[:]), 0))
                else:
                    context = tf.transpose(context, [1, 0])
                '''
                '''
                    if (len(context.shape.dims) == 3):
                        context = tf.transpose(context, perm=[1, 0, 2])
                    if (len(context.shape.dims) == 2):
                        context = tf.transpose(context, perm=[1, 0])
                '''
                context = tf.transpose(context, [1, 0, 2])

                # Concat context vectors and token_embeddings
                # ctx = self.ctx_v
                # print("Token Embeddings: ", self.token_embeddings.shape)
                # print("Context Vectors: ", context.shape)
                embeds = tf.concat((context, self.token_embeddings), 1)

                # print("Running the rest of the model")

            output_pool_mode = self.get_hyper('rnn_pool_mode').lower()
            if output_pool_mode == 'rnn_final':
                return rnn_final_state
            else:
                token_mask = tf.expand_dims(tf.range(
                    tf.shape(self.seq_tokens)[1]),
                                            axis=0)  # 1 x T
                if (self.get_hyper("rnn_do_attention") == True):
                    token_mask = tf.expand_dims(tf.range(
                        tf.shape(self.seq_tokens)[1] * 2),
                                                axis=0)  # 1 x T        # 1 x T
                token_mask = tf.tile(
                    token_mask,
                    multiples=(tf.shape(seq_tokens_lengths)[0], 1))  # B x T
                token_mask = tf.cast(
                    token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1),
                    dtype=tf.float32)  # B x T
                return pool_sequence_embedding(
                    output_pool_mode,
                    sequence_token_embeddings=embeds,
                    sequence_lengths=seq_tokens_lengths,
                    sequence_token_masks=token_mask,
                    is_train=is_train)
예제 #12
0
    def _make_model(self, is_train: bool) -> None:
        """
        Create the actual model.

        Note: This has to create self.ops['code_representations'] and self.ops['query_representations'],
        tensors of the same shape and rank 2.
        """
        self._placeholders['dropout_keep_rate'] = tf.placeholder(
            tf.float32, shape=(), name='dropout_keep_rate')
        self._placeholders['sample_loss_weights'] = \
            tf.placeholder_with_default(input=np.ones(shape=[self.hyperparameters['batch_size']],
                                                      dtype=np.float32),
                                        shape=[
                                            self.hyperparameters['batch_size']],
                                        name='sample_loss_weights')

        with tf.variable_scope("code_encoder"):
            language_encoders = []
            language_encoder_masks = []

            for (language, language_metadata) in sorted(
                    self._per_code_language_metadata.items(),
                    key=lambda kv: kv[0]):
                with tf.variable_scope(language):
                    self._code_encoders[language] = self._code_encoder_type(
                        label="code",
                        hyperparameters=self.hyperparameters,
                        metadata=language_metadata)
                    language_encoder, language_encoder_mask, language_encoder_lens = self._code_encoders[
                        language].build_model(is_train=is_train)

                    language_encoders.append(language_encoder)
                    language_encoder_masks.append(language_encoder_mask)
            self.ops['code_representations'] = tf.concat(language_encoders,
                                                         axis=0)
            self.ops['code_representation_masks'] = tf.concat(
                language_encoder_masks, axis=0)
        with tf.variable_scope("query_encoder"):
            self._query_encoder = self._query_encoder_type(
                label="query",
                hyperparameters=self.hyperparameters,
                metadata=self._query_metadata)
            self.ops['query_representations'], self.ops[
                'query_representation_masks'], query_sequence_lengths = self._query_encoder.build_model(
                    is_train=is_train)
        '''
        code_representation_size = next(
            iter(self.__code_encoders.values())).output_representation_size
        query_representation_size = self.__query_encoder.output_representation_size
        assert code_representation_size == query_representation_size, \
            f'Representations produced for code ({code_representation_size}) and query ({query_representation_size}) cannot differ!'
        '''
        # There is a tricky here, we generated negtive samples based on the positive smapels in the batch.
        # query: [B,F,H] -> [B,B,F,H] -> [B*B,F,H]
        # code:  [B,T,H] -> [B,B,T,H] -> [B*B,T,H]
        query_shape = tf.shape(self.ops['query_representations'])
        code_shape = tf.shape(self.ops['code_representations'])
        #print(self.ops['query_representations'].shape, self.ops['code_representations'].shape)
        '''
        Tricky here!
        '''
        self.ops['query_representations'] = tf.reshape(
            tf.tile(tf.expand_dims(self.ops['query_representations'], 0),
                    [code_shape[0], 1, 1, 1]), [-1, 30, 128])
        self.ops['query_representation_masks'] = tf.reshape(
            tf.tile(tf.expand_dims(self.ops['query_representation_masks'], 0),
                    [code_shape[0], 1, 1]), [-1, 30])

        self.ops['code_representations'] = tf.reshape(
            tf.tile(tf.expand_dims(self.ops['code_representations'], 1),
                    [1, code_shape[0], 1, 1]), [-1, 200, 128])
        self.ops['code_representation_masks'] = tf.reshape(
            tf.tile(tf.expand_dims(self.ops['code_representation_masks'], 1),
                    [1, code_shape[0], 1]), [-1, 200])
        #print(self.ops['query_representations'].shape, self.ops['code_representations'].shape)

        with tf.variable_scope("cross_encoder"):
            # Create attention mask [B,F,T]
            # [B,F] -> [B,F,T], [B,T] -> [B,F,T], [B,F,T]*[B,F,T]
            query_mask_shape = tf.shape(self.ops['query_representation_masks'])
            code_mask_shape = tf.shape(self.ops['code_representation_masks'])
            print(self.ops['query_representations'].shape,
                  self.ops['code_representations'].shape)
            attention_mask = tf.tile(
                tf.expand_dims(self.ops['query_representation_masks'], 2),
                [1, 1, code_mask_shape[-1]]) * tf.tile(
                    tf.expand_dims(self.ops['code_representation_masks'], 1),
                    [1, query_mask_shape[-1], 1])
            # print(attention_mask)
            # [B,F,H]
            with tf.variable_scope("attention_layer"):
                output_layer = self.attention_layer(
                    self.ops['query_representations'],
                    self.ops['code_representations'], attention_mask)
                pool_output = pool_sequence_embedding(
                    "weighted_mean",
                    sequence_token_embeddings=output_layer,
                    sequence_lengths=query_sequence_lengths,
                    sequence_token_masks=self.ops['query_representation_masks']
                )

            output_weights = tf.get_variable(
                'output_weights', [128, 1],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            output_bias = tf.get_variable(
                'output_bias', [1],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            # [B,1]
            # print(output_weights, output_bias)
            self.ops['logits'] = tf.nn.bias_add(
                tf.matmul(pool_output, output_weights), output_bias)
예제 #13
0
    def _complex_model(self, is_train: bool = False) -> tf.Tensor:
        models = ['nbow', 'rnn']  # nbow, cnn, rnn, bert
        attention = False
        embeddings = list()
        with tf.variable_scope("tree_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32, shape=[None], name='tokens_lengths')
            self.placeholders['rnn_dropout_keep_rate'] = \
                tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate')
            self.placeholders['rnn_recurrent_dropout_keep_rate'] = \
                tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate')

            common_flag = True
            if 'nbow' in models and 'rnn' in models:
                seq_tokens = self.placeholders['tokens']
                seq_tokens_embeddings = self.embedding_layer(seq_tokens)
                common_flag = False
            if 'nbow' in models:
                if common_flag:
                    seq_tokens_embeddings = self.embedding_layer(
                        self.placeholders['tokens'])
                seq_token_mask = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B

                embedding = pool_sequence_embedding(
                    self.get_hyper('nbow_pool_mode').lower(),
                    sequence_token_embeddings=seq_tokens_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_mask)
                embeddings.append(embedding)
            if 'cnn' in models:
                if common_flag:
                    seq_tokens_embeddings = self.embedding_layer(
                        self.placeholders['tokens'])
                seq_tokens_embeddings = self.__add_position_encoding(
                    seq_tokens_embeddings)

                activation_fun = get_activation(
                    self.get_hyper('1dcnn_activation'))
                current_embeddings = seq_tokens_embeddings
                num_filters_and_width = zip(
                    self.get_hyper('1dcnn_layer_list'),
                    self.get_hyper('1dcnn_kernel_width'))
                for (layer_idx,
                     (num_filters,
                      kernel_width)) in enumerate(num_filters_and_width):
                    next_embeddings = tf.layers.conv1d(
                        inputs=current_embeddings,
                        filters=num_filters,
                        kernel_size=kernel_width,
                        padding="same")

                    # Add residual connections past the first layer.
                    if self.get_hyper('1dcnn_add_residual_connections'
                                      ) and layer_idx > 0:
                        next_embeddings += current_embeddings

                    current_embeddings = activation_fun(next_embeddings)
                    current_embeddings = tf.nn.dropout(
                        current_embeddings,
                        keep_prob=self.placeholders['dropout_keep_rate'])

                seq_token_mask = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B
                embedding = pool_sequence_embedding(
                    self.get_hyper('1dcnn_pool_mode').lower(),
                    sequence_token_embeddings=current_embeddings,
                    sequence_lengths=seq_token_lengths,
                    sequence_token_masks=seq_token_mask)
                embeddings.append(embedding)
            if 'rnn' in models:
                if common_flag:
                    seq_tokens = self.placeholders['tokens']
                    seq_tokens_embeddings = self.embedding_layer(seq_tokens)
                seq_tokens_lengths = self.placeholders['tokens_lengths']
                rnn_final_state, token_embeddings = self._encode_with_rnn(
                    seq_tokens_embeddings, seq_tokens_lengths)

                output_pool_mode = self.get_hyper('rnn_pool_mode').lower()
                if output_pool_mode == 'rnn_final':
                    embedding = rnn_final_state
                else:
                    token_mask = tf.expand_dims(tf.range(
                        tf.shape(seq_tokens)[1]),
                                                axis=0)  # 1 x T
                    token_mask = tf.tile(
                        token_mask,
                        multiples=(tf.shape(seq_tokens_lengths)[0],
                                   1))  # B x T
                    token_mask = tf.cast(token_mask < tf.expand_dims(
                        seq_tokens_lengths, axis=-1),
                                         dtype=tf.float32)  # B x T
                    embedding = pool_sequence_embedding(
                        output_pool_mode,
                        sequence_token_embeddings=token_embeddings,
                        sequence_lengths=seq_tokens_lengths,
                        sequence_token_masks=token_mask)
                embeddings.append(embedding)
            if 'bert' in models:
                config = BertConfig(
                    vocab_size=self.get_hyper('token_vocab_size'),
                    hidden_size=self.get_hyper('self_attention_hidden_size'),
                    num_hidden_layers=self.get_hyper(
                        'self_attention_num_layers'),
                    num_attention_heads=self.get_hyper(
                        'self_attention_num_heads'),
                    intermediate_size=self.get_hyper(
                        'self_attention_intermediate_size'))

                model = BertModel(config=config,
                                  is_training=is_train,
                                  input_ids=self.placeholders['tokens'],
                                  input_mask=self.placeholders['tokens_mask'],
                                  use_one_hot_embeddings=False)

                output_pool_mode = self.get_hyper(
                    'self_attention_pool_mode').lower()
                if output_pool_mode == 'bert':
                    embedding = model.get_pooled_output()
                else:
                    seq_token_embeddings = model.get_sequence_output()
                    seq_token_masks = self.placeholders['tokens_mask']
                    seq_token_lengths = tf.reduce_sum(seq_token_masks,
                                                      axis=1)  # B
                    embedding = pool_sequence_embedding(
                        output_pool_mode,
                        sequence_token_embeddings=seq_token_embeddings,
                        sequence_lengths=seq_token_lengths,
                        sequence_token_masks=seq_token_masks)
                embeddings.append(embedding)

            embeddings = tf.concat(embeddings, axis=-1)
            if attention:
                embeddings = Common.self_attention_layer(embeddings)
            # "concat one-hot" is equal to "accumulate embedding"
            # [v1^T, v2^T, v3^T] * W = [v1^T, v2^T, v3^T]*[w1, w2, w3]^T = v1^T*w1+v2^T*w2+v3^T*w3
            print('*@' * 16)
            print(embeddings)
            print(tf.shape(embeddings))
            return tf.reduce_sum(embeddings, axis=0)
예제 #14
0
    def _single_model(self, is_train: bool = False) -> tf.Tensor:
        model = 'nbow'  # nbow, cnn, rnn, bert
        attention = False
        embedding = None
        with tf.variable_scope("tree_encoder"):
            self._make_placeholders()

            self.placeholders['tokens_lengths'] = \
                tf.placeholder(tf.int32, shape=[None], name='tokens_lengths')
            self.placeholders['rnn_dropout_keep_rate'] = \
                tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate')
            self.placeholders['rnn_recurrent_dropout_keep_rate'] = \
                tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate')

            if model == 'nbow':
                seq_tokens_embeddings = self.embedding_layer(
                    self.placeholders['tokens'])
                seq_token_mask = self.placeholders['tokens_mask']
                seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1)  # B

                if attention:
                    embedding = Common.yet_attention_layer(
                        seq_tokens_embeddings)
                else:
                    embedding = pool_sequence_embedding(
                        self.get_hyper('nbow_pool_mode').lower(),
                        sequence_token_embeddings=seq_tokens_embeddings,
                        sequence_lengths=seq_token_lengths,
                        sequence_token_masks=seq_token_mask)
            elif model == 'cnn':
                seq_tokens_embeddings = self.embedding_layer(
                    self.placeholders['tokens'])
                seq_tokens_embeddings = self.__add_position_encoding(
                    seq_tokens_embeddings)

                activation_fun = get_activation(
                    self.get_hyper('1dcnn_activation'))
                current_embeddings = seq_tokens_embeddings
                num_filters_and_width = zip(
                    self.get_hyper('1dcnn_layer_list'),
                    self.get_hyper('1dcnn_kernel_width'))
                for (layer_idx,
                     (num_filters,
                      kernel_width)) in enumerate(num_filters_and_width):
                    next_embeddings = tf.layers.conv1d(
                        inputs=current_embeddings,
                        filters=num_filters,
                        kernel_size=kernel_width,
                        padding="same")

                    # Add residual connections past the first layer.
                    if self.get_hyper('1dcnn_add_residual_connections'
                                      ) and layer_idx > 0:
                        next_embeddings += current_embeddings

                    current_embeddings = activation_fun(next_embeddings)
                    current_embeddings = tf.nn.dropout(
                        current_embeddings,
                        keep_prob=self.placeholders['dropout_keep_rate'])

                if attention:
                    embedding = Common.yet_attention_layer(current_embeddings)
                else:
                    seq_token_mask = self.placeholders['tokens_mask']
                    seq_token_lengths = tf.reduce_sum(seq_token_mask,
                                                      axis=1)  # B
                    embedding = pool_sequence_embedding(
                        self.get_hyper('1dcnn_pool_mode').lower(),
                        sequence_token_embeddings=current_embeddings,
                        sequence_lengths=seq_token_lengths,
                        sequence_token_masks=seq_token_mask)
            elif model == 'rnn':
                seq_tokens = self.placeholders['tokens']
                seq_tokens_embeddings = self.embedding_layer(seq_tokens)
                seq_tokens_lengths = self.placeholders['tokens_lengths']
                rnn_final_state, token_embeddings = self._encode_with_rnn(
                    seq_tokens_embeddings, seq_tokens_lengths)

                output_pool_mode = self.get_hyper('rnn_pool_mode').lower()
                if output_pool_mode == 'rnn_final':
                    embedding = rnn_final_state
                else:
                    if attention:
                        embedding = Common.yet_attention_layer(
                            token_embeddings)
                    else:
                        token_mask = tf.expand_dims(tf.range(
                            tf.shape(seq_tokens)[1]),
                                                    axis=0)  # 1 x T
                        token_mask = tf.tile(
                            token_mask,
                            multiples=(tf.shape(seq_tokens_lengths)[0],
                                       1))  # B x T
                        token_mask = tf.cast(token_mask < tf.expand_dims(
                            seq_tokens_lengths, axis=-1),
                                             dtype=tf.float32)  # B x T
                        embedding = pool_sequence_embedding(
                            output_pool_mode,
                            sequence_token_embeddings=token_embeddings,
                            sequence_lengths=seq_tokens_lengths,
                            sequence_token_masks=token_mask)
            elif model == 'bert':
                config = BertConfig(
                    vocab_size=self.get_hyper('token_vocab_size'),
                    hidden_size=self.get_hyper('self_attention_hidden_size'),
                    num_hidden_layers=self.get_hyper(
                        'self_attention_num_layers'),
                    num_attention_heads=self.get_hyper(
                        'self_attention_num_heads'),
                    intermediate_size=self.get_hyper(
                        'self_attention_intermediate_size'))

                model = BertModel(config=config,
                                  is_training=is_train,
                                  input_ids=self.placeholders['tokens'],
                                  input_mask=self.placeholders['tokens_mask'],
                                  use_one_hot_embeddings=False)

                output_pool_mode = self.get_hyper(
                    'self_attention_pool_mode').lower()
                if output_pool_mode == 'bert':
                    embedding = model.get_pooled_output()
                else:
                    seq_token_embeddings = model.get_sequence_output()
                    # only when it is not pooled out, then we consider attention
                    if attention:
                        embedding = Common.yet_attention_layer(
                            seq_token_embeddings)
                    else:
                        seq_token_masks = self.placeholders['tokens_mask']
                        seq_token_lengths = tf.reduce_sum(seq_token_masks,
                                                          axis=1)  # B
                        embedding = pool_sequence_embedding(
                            output_pool_mode,
                            sequence_token_embeddings=seq_token_embeddings,
                            sequence_lengths=seq_token_lengths,
                            sequence_token_masks=seq_token_masks)
            else:
                raise ValueError('Undefined Config')
            return embedding