def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope('graph_encoder'): self._make_placeholders() node_tokens = self.token_embedding_layer(self.placeholders['node_token_ids'], suffix='_node') print('node tokens', node_tokens.shape) node_token_masks = self.placeholders['node_masks'] print('node token masks', node_token_masks.shape) node_token_lens = tf.reduce_sum(node_token_masks, axis=1) # B token_encoding = pool_sequence_embedding('weighted_mean', sequence_token_embeddings=node_tokens, sequence_lengths=node_token_lens, sequence_token_masks=node_token_masks) print('token encoding', token_encoding.shape) node_encodings = self._build_stack(node_tokens, is_train) if node_encodings is not None: print('node encoding', node_encodings.shape) graph_encoding = pool_sequence_embedding('mean', sequence_token_embeddings=node_encodings, sequence_lengths=node_token_lens, sequence_token_masks=node_token_masks) if node_encodings is None: return token_encoding if self.get_hyper('is_plain'): return graph_encoding return token_encoding + graph_encoding
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("self_attention_encoder"): self._make_placeholders() config = BertConfig( vocab_size=self.get_hyper('token_vocab_size'), hidden_size=self.get_hyper('self_attention_hidden_size'), num_hidden_layers=self.get_hyper('self_attention_num_layers'), num_attention_heads=self.get_hyper('self_attention_num_heads'), intermediate_size=self.get_hyper( 'self_attention_intermediate_size')) model = BertModel(config=config, is_training=is_train, input_ids=self.placeholders['tokens'], input_mask=self.placeholders['tokens_mask'], use_one_hot_embeddings=False) output_pool_mode = self.get_hyper( 'self_attention_pool_mode').lower() if output_pool_mode == 'bert': return model.get_pooled_output() else: seq_token_embeddings = model.get_sequence_output() seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1) # B return pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks, is_train=is_train)
def make_model(self, is_train: bool=False) -> tf.Tensor: with tf.variable_scope("1dcnn_encoder"): self._make_placeholders() seq_tokens_embeddings = self.embedding_layer(self.placeholders['tokens']) seq_tokens_embeddings = self.__add_position_encoding(seq_tokens_embeddings) activation_fun = get_activation(self.get_hyper('1dcnn_activation')) current_embeddings = seq_tokens_embeddings num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'), self.get_hyper('1dcnn_kernel_width')) for (layer_idx, (num_filters, kernel_width)) in enumerate(num_filters_and_width): next_embeddings = tf.layers.conv1d( inputs=current_embeddings, filters=num_filters, kernel_size=kernel_width, padding="same") # Add residual connections past the first layer. if self.get_hyper('1dcnn_add_residual_connections') and layer_idx > 0: next_embeddings += current_embeddings current_embeddings = activation_fun(next_embeddings) current_embeddings = tf.nn.dropout(current_embeddings, keep_prob=self.placeholders['dropout_keep_rate']) seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B return pool_sequence_embedding(self.get_hyper('1dcnn_pool_mode').lower(), sequence_token_embeddings=current_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask)
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("cbow_encoder"): self._make_placeholders() self.seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens'] ) # batch size x max seq len x emb dim seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B batch_seq_len = self.seq_tokens_embeddings.get_shape( ).dims[1].value # pad seqs paddings = tf.constant([[0, 0], [2, 2], [0, 0]]) self.seq_tokens_embeddings = tf.pad(self.seq_tokens_embeddings, paddings, "CONSTANT") self.seq_tokens_embeddings = tf.map_fn( self.token_sums, tf.range(0, batch_seq_len, 1), parallel_iterations=1, dtype=(tf.float32)) # max seq len x batch size x emb dim # perm dims self.seq_tokens_embeddings = tf.transpose( self.seq_tokens_embeddings, perm=[1, 0, 2]) # batch size x max seq len x emb dim return pool_sequence_embedding( self.get_hyper('cbow_pool_mode').lower(), sequence_token_embeddings=self.seq_tokens_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask, is_train=is_train)
def make_model(self, is_train: bool = False): # with tf.compat.v1.variable_scope("gpt2_encoder_" + name): self._make_placeholders() """ GPT-2 uses Transformer's decoder as a building block, excluding the encoder-decoder attention module. Thus, the only difference with Bert's building blocks(Transformer's encoder) is the masked attention. However, in this implementation the masked attention is used for the BertEncoder. Therefore the BertModel will be used and adjust the hyper-parameters to be the same of those of the pretrained GPT-2 models. """ cache_dir = "../resources/hugging_face/gpt2/" model = TFGPT2Model.from_pretrained('gpt2', cache_dir=cache_dir, return_dict=True) output = model(self.placeholders['tokens'], training=is_train) seq_token_embeddings = output.last_hidden_state seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(input_tensor=seq_token_masks, axis=1) # B return pool_sequence_embedding( "weighted_mean", sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks)
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("self_attention_encoder"): self._make_placeholders() seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens']) activation_fun = get_activation(self.get_hyper('1dcnn_activation')) current_embeddings = seq_tokens_embeddings num_filters_and_width = zip(self.get_hyper('1dcnn_layer_list'), self.get_hyper('1dcnn_kernel_width')) for (layer_idx, (num_filters, kernel_width)) in enumerate(num_filters_and_width): next_embeddings = tf.layers.conv1d(inputs=current_embeddings, filters=num_filters, kernel_size=kernel_width, padding="same") # Add residual connections past the first layer. if self.get_hyper( '1dcnn_add_residual_connections') and layer_idx > 0: next_embeddings += current_embeddings current_embeddings = activation_fun(next_embeddings) current_embeddings = tf.nn.dropout( current_embeddings, keep_prob=self.placeholders['dropout_keep_rate']) config = BertConfig( vocab_size=self.get_hyper('token_vocab_size'), hidden_size=self.get_hyper('self_attention_hidden_size'), num_hidden_layers=self.get_hyper('self_attention_num_layers'), num_attention_heads=self.get_hyper('self_attention_num_heads'), intermediate_size=self.get_hyper( 'self_attention_intermediate_size')) model = BertModel(config=config, is_training=is_train, input_ids=self.placeholders['tokens'], input_mask=self.placeholders['tokens_mask'], use_one_hot_embeddings=False, embedded_input=current_embeddings) output_pool_mode = self.get_hyper( 'self_attention_pool_mode').lower() if output_pool_mode == 'bert': return model.get_pooled_output() else: seq_token_embeddings = model.get_sequence_output() seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1) # B return pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks, is_train=is_train)
def make_model(self, is_train: bool = True) -> tf.Tensor: with tf.variable_scope("elmo_encoder"): self._make_placeholders() self.placeholders['tokens_lengths'] = \ tf.placeholder(tf.int32, shape=[None], name='tokens_lengths') self.placeholders['tokens'] = \ tf.placeholder(tf.int32, shape=[None, self.get_hyper('max_num_tokens')], name='tokens') self.placeholders['tokens_str'] = \ tf.placeholder(tf.string, shape=[None, self.get_hyper('max_num_tokens')], name='tokens_str') seq_tokens_tokens = self.placeholders['tokens'] seq_tokens = self.placeholders['tokens_str'] seq_tokens_lengths = self.placeholders['tokens_lengths'] # ## DEBUGGING: OUTPUT SHAPES # print("Sequence Tokens Shape: %s" % seq_tokens.shape) # print("Sequence Tokens Lengths: %s" % seq_tokens_lengths) ## pull elmo model from tensorflow hub elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=is_train) token_embeddings = elmo( { "tokens": seq_tokens_tokens, "sequence_len": seq_tokens_lengths }, signature='tokens', as_dict=True)['elmo'] ## [batch_size, max_length, 1024 or 512] ## add the elmo model to the trainable variables output_pool_mode = self.get_hyper('elmo_pool_mode').lower() if output_pool_mode is ELMO_FINAL: return token_embeddings else: token_mask = tf.expand_dims(tf.range(tf.shape(seq_tokens)[1]), axis=0) # 1 x T token_mask = tf.tile( token_mask, multiples=(tf.shape(seq_tokens_lengths)[0], 1)) # B x T token_mask = tf.cast( token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1), dtype=tf.float32) # B x T return pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=token_embeddings, sequence_lengths=seq_tokens_lengths, sequence_token_masks=token_mask)
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope('ast_tokens_encoder'): self._make_placeholders() node_tokens = self.embedding_layer(self.placeholders['tokens']) node_token_masks = self.placeholders['node_masks'] node_token_lens = tf.reduce_sum(node_token_masks, axis=1) # B token_encoding = pool_sequence_embedding('mean', sequence_token_embeddings=node_tokens, sequence_lengths=node_token_lens, sequence_token_masks=node_token_masks) return token_encoding
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("nbow_encoder"): self._make_placeholders() seq_tokens_embeddings = self.pretrained_embedding_layer(self.placeholders['tokens']) seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B return pool_sequence_embedding( self.get_hyper('nbow_pool_mode').lower(), sequence_token_embeddings=seq_tokens_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask)
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("rnn_encoder"): self._make_placeholders() self.placeholders['tokens_lengths'] = \ tf.placeholder(tf.int32, shape=[None], name='tokens_lengths') self.placeholders['rnn_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate') self.placeholders['rnn_recurrent_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate') seq_tokens = self.placeholders['tokens'] seq_tokens_embeddings = self.embedding_layer(seq_tokens) seq_tokens_lengths = self.placeholders['tokens_lengths'] trans_seq_tokens_embeddings = tf.transpose(seq_tokens_embeddings, [1, 0, 2]) lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, 64) rnn_token_embeddings, rnn_final_state = lstm( trans_seq_tokens_embeddings) #rnn_final_state, token_embeddings = self._encode_with_rnn(seq_tokens_embeddings, seq_tokens_lengths) token_embeddings = tf.transpose(rnn_token_embeddings, [1, 0, 2]) output_pool_mode = self.get_hyper('rnn_pool_mode').lower() if output_pool_mode == 'rnn_final': return rnn_final_state else: token_mask = tf.expand_dims(tf.range(tf.shape(seq_tokens)[1]), axis=0) # 1 x T token_mask = tf.tile( token_mask, multiples=(tf.shape(seq_tokens_lengths)[0], 1)) # B x T token_mask = tf.cast( token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1), dtype=tf.float32) # B x T return pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=token_embeddings, sequence_lengths=seq_tokens_lengths, sequence_token_masks=token_mask)
def make_model(self, is_train: bool = False) -> tf.Tensor: with tf.variable_scope("rnn_encoder"): self._make_placeholders() self.placeholders['tokens_lengths'] = \ tf.placeholder(tf.int32, shape=[None], name='tokens_lengths') self.placeholders['rnn_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate') self.placeholders['rnn_recurrent_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate') self.seq_tokens = self.placeholders['tokens'] seq_tokens_embeddings = self.embedding_layer(self.seq_tokens) seq_tokens_lengths = self.placeholders['tokens_lengths'] rnn_final_state, self.token_embeddings = self._encode_with_rnn( seq_tokens_embeddings, seq_tokens_lengths) # TODO: Add call for Attention code. # Try to use batch queries so you can do bmm (TensorFlow equivalent) # Dim: batch_size, max_seq_len, emb_dim # Iterate over max_seq_len. For each token in sequence, do Attention #tf.map_fn -> runs a function over a set of values embeds = self.token_embeddings if (self.get_hyper('rnn_do_attention') == True): self.batch_seq_len = self.seq_tokens.get_shape().dims[1].value # self.attention = BahdanauAttention(self.batch_seq_len) # Do attention on each timestep batch_num = 100 # print("Starting Attention Setup") self.weights = tf.zeros([batch_num, 1, self.batch_seq_len]) # print("Set up Weights") # self.ctx_v = tf.zeros(tf.shape(self.token_embeddings[:, 0:1, :])) # print("Set up Context Vector") # run attention_hw_style on all tokens # print("Running Attention") context_list = tf.map_fn(self.attention_hw_style, tf.range(0, self.batch_seq_len, 1), dtype=(tf.float32)) # print("Concatenating Context Vectors with Token Embeddings") context = context_list # if (size == 4), squeeze, else dont if (len(context_list.shape.dims) == 4): context = tf.squeeze(context_list) context = tf.concat(context, 1) # if (context.shape.dims != None): ''' if (tf.rank(context)[:] > 2): context = tf.transpose(context, tf.concat([1, 0], tf.range(2, tf.rank(context)[:]), 0)) else: context = tf.transpose(context, [1, 0]) ''' ''' if (len(context.shape.dims) == 3): context = tf.transpose(context, perm=[1, 0, 2]) if (len(context.shape.dims) == 2): context = tf.transpose(context, perm=[1, 0]) ''' context = tf.transpose(context, [1, 0, 2]) # Concat context vectors and token_embeddings # ctx = self.ctx_v # print("Token Embeddings: ", self.token_embeddings.shape) # print("Context Vectors: ", context.shape) embeds = tf.concat((context, self.token_embeddings), 1) # print("Running the rest of the model") output_pool_mode = self.get_hyper('rnn_pool_mode').lower() if output_pool_mode == 'rnn_final': return rnn_final_state else: token_mask = tf.expand_dims(tf.range( tf.shape(self.seq_tokens)[1]), axis=0) # 1 x T if (self.get_hyper("rnn_do_attention") == True): token_mask = tf.expand_dims(tf.range( tf.shape(self.seq_tokens)[1] * 2), axis=0) # 1 x T # 1 x T token_mask = tf.tile( token_mask, multiples=(tf.shape(seq_tokens_lengths)[0], 1)) # B x T token_mask = tf.cast( token_mask < tf.expand_dims(seq_tokens_lengths, axis=-1), dtype=tf.float32) # B x T return pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=embeds, sequence_lengths=seq_tokens_lengths, sequence_token_masks=token_mask, is_train=is_train)
def _make_model(self, is_train: bool) -> None: """ Create the actual model. Note: This has to create self.ops['code_representations'] and self.ops['query_representations'], tensors of the same shape and rank 2. """ self._placeholders['dropout_keep_rate'] = tf.placeholder( tf.float32, shape=(), name='dropout_keep_rate') self._placeholders['sample_loss_weights'] = \ tf.placeholder_with_default(input=np.ones(shape=[self.hyperparameters['batch_size']], dtype=np.float32), shape=[ self.hyperparameters['batch_size']], name='sample_loss_weights') with tf.variable_scope("code_encoder"): language_encoders = [] language_encoder_masks = [] for (language, language_metadata) in sorted( self._per_code_language_metadata.items(), key=lambda kv: kv[0]): with tf.variable_scope(language): self._code_encoders[language] = self._code_encoder_type( label="code", hyperparameters=self.hyperparameters, metadata=language_metadata) language_encoder, language_encoder_mask, language_encoder_lens = self._code_encoders[ language].build_model(is_train=is_train) language_encoders.append(language_encoder) language_encoder_masks.append(language_encoder_mask) self.ops['code_representations'] = tf.concat(language_encoders, axis=0) self.ops['code_representation_masks'] = tf.concat( language_encoder_masks, axis=0) with tf.variable_scope("query_encoder"): self._query_encoder = self._query_encoder_type( label="query", hyperparameters=self.hyperparameters, metadata=self._query_metadata) self.ops['query_representations'], self.ops[ 'query_representation_masks'], query_sequence_lengths = self._query_encoder.build_model( is_train=is_train) ''' code_representation_size = next( iter(self.__code_encoders.values())).output_representation_size query_representation_size = self.__query_encoder.output_representation_size assert code_representation_size == query_representation_size, \ f'Representations produced for code ({code_representation_size}) and query ({query_representation_size}) cannot differ!' ''' # There is a tricky here, we generated negtive samples based on the positive smapels in the batch. # query: [B,F,H] -> [B,B,F,H] -> [B*B,F,H] # code: [B,T,H] -> [B,B,T,H] -> [B*B,T,H] query_shape = tf.shape(self.ops['query_representations']) code_shape = tf.shape(self.ops['code_representations']) #print(self.ops['query_representations'].shape, self.ops['code_representations'].shape) ''' Tricky here! ''' self.ops['query_representations'] = tf.reshape( tf.tile(tf.expand_dims(self.ops['query_representations'], 0), [code_shape[0], 1, 1, 1]), [-1, 30, 128]) self.ops['query_representation_masks'] = tf.reshape( tf.tile(tf.expand_dims(self.ops['query_representation_masks'], 0), [code_shape[0], 1, 1]), [-1, 30]) self.ops['code_representations'] = tf.reshape( tf.tile(tf.expand_dims(self.ops['code_representations'], 1), [1, code_shape[0], 1, 1]), [-1, 200, 128]) self.ops['code_representation_masks'] = tf.reshape( tf.tile(tf.expand_dims(self.ops['code_representation_masks'], 1), [1, code_shape[0], 1]), [-1, 200]) #print(self.ops['query_representations'].shape, self.ops['code_representations'].shape) with tf.variable_scope("cross_encoder"): # Create attention mask [B,F,T] # [B,F] -> [B,F,T], [B,T] -> [B,F,T], [B,F,T]*[B,F,T] query_mask_shape = tf.shape(self.ops['query_representation_masks']) code_mask_shape = tf.shape(self.ops['code_representation_masks']) print(self.ops['query_representations'].shape, self.ops['code_representations'].shape) attention_mask = tf.tile( tf.expand_dims(self.ops['query_representation_masks'], 2), [1, 1, code_mask_shape[-1]]) * tf.tile( tf.expand_dims(self.ops['code_representation_masks'], 1), [1, query_mask_shape[-1], 1]) # print(attention_mask) # [B,F,H] with tf.variable_scope("attention_layer"): output_layer = self.attention_layer( self.ops['query_representations'], self.ops['code_representations'], attention_mask) pool_output = pool_sequence_embedding( "weighted_mean", sequence_token_embeddings=output_layer, sequence_lengths=query_sequence_lengths, sequence_token_masks=self.ops['query_representation_masks'] ) output_weights = tf.get_variable( 'output_weights', [128, 1], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( 'output_bias', [1], initializer=tf.truncated_normal_initializer(stddev=0.02)) # [B,1] # print(output_weights, output_bias) self.ops['logits'] = tf.nn.bias_add( tf.matmul(pool_output, output_weights), output_bias)
def _complex_model(self, is_train: bool = False) -> tf.Tensor: models = ['nbow', 'rnn'] # nbow, cnn, rnn, bert attention = False embeddings = list() with tf.variable_scope("tree_encoder"): self._make_placeholders() self.placeholders['tokens_lengths'] = \ tf.placeholder(tf.int32, shape=[None], name='tokens_lengths') self.placeholders['rnn_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate') self.placeholders['rnn_recurrent_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate') common_flag = True if 'nbow' in models and 'rnn' in models: seq_tokens = self.placeholders['tokens'] seq_tokens_embeddings = self.embedding_layer(seq_tokens) common_flag = False if 'nbow' in models: if common_flag: seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens']) seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B embedding = pool_sequence_embedding( self.get_hyper('nbow_pool_mode').lower(), sequence_token_embeddings=seq_tokens_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask) embeddings.append(embedding) if 'cnn' in models: if common_flag: seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens']) seq_tokens_embeddings = self.__add_position_encoding( seq_tokens_embeddings) activation_fun = get_activation( self.get_hyper('1dcnn_activation')) current_embeddings = seq_tokens_embeddings num_filters_and_width = zip( self.get_hyper('1dcnn_layer_list'), self.get_hyper('1dcnn_kernel_width')) for (layer_idx, (num_filters, kernel_width)) in enumerate(num_filters_and_width): next_embeddings = tf.layers.conv1d( inputs=current_embeddings, filters=num_filters, kernel_size=kernel_width, padding="same") # Add residual connections past the first layer. if self.get_hyper('1dcnn_add_residual_connections' ) and layer_idx > 0: next_embeddings += current_embeddings current_embeddings = activation_fun(next_embeddings) current_embeddings = tf.nn.dropout( current_embeddings, keep_prob=self.placeholders['dropout_keep_rate']) seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B embedding = pool_sequence_embedding( self.get_hyper('1dcnn_pool_mode').lower(), sequence_token_embeddings=current_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask) embeddings.append(embedding) if 'rnn' in models: if common_flag: seq_tokens = self.placeholders['tokens'] seq_tokens_embeddings = self.embedding_layer(seq_tokens) seq_tokens_lengths = self.placeholders['tokens_lengths'] rnn_final_state, token_embeddings = self._encode_with_rnn( seq_tokens_embeddings, seq_tokens_lengths) output_pool_mode = self.get_hyper('rnn_pool_mode').lower() if output_pool_mode == 'rnn_final': embedding = rnn_final_state else: token_mask = tf.expand_dims(tf.range( tf.shape(seq_tokens)[1]), axis=0) # 1 x T token_mask = tf.tile( token_mask, multiples=(tf.shape(seq_tokens_lengths)[0], 1)) # B x T token_mask = tf.cast(token_mask < tf.expand_dims( seq_tokens_lengths, axis=-1), dtype=tf.float32) # B x T embedding = pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=token_embeddings, sequence_lengths=seq_tokens_lengths, sequence_token_masks=token_mask) embeddings.append(embedding) if 'bert' in models: config = BertConfig( vocab_size=self.get_hyper('token_vocab_size'), hidden_size=self.get_hyper('self_attention_hidden_size'), num_hidden_layers=self.get_hyper( 'self_attention_num_layers'), num_attention_heads=self.get_hyper( 'self_attention_num_heads'), intermediate_size=self.get_hyper( 'self_attention_intermediate_size')) model = BertModel(config=config, is_training=is_train, input_ids=self.placeholders['tokens'], input_mask=self.placeholders['tokens_mask'], use_one_hot_embeddings=False) output_pool_mode = self.get_hyper( 'self_attention_pool_mode').lower() if output_pool_mode == 'bert': embedding = model.get_pooled_output() else: seq_token_embeddings = model.get_sequence_output() seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1) # B embedding = pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks) embeddings.append(embedding) embeddings = tf.concat(embeddings, axis=-1) if attention: embeddings = Common.self_attention_layer(embeddings) # "concat one-hot" is equal to "accumulate embedding" # [v1^T, v2^T, v3^T] * W = [v1^T, v2^T, v3^T]*[w1, w2, w3]^T = v1^T*w1+v2^T*w2+v3^T*w3 print('*@' * 16) print(embeddings) print(tf.shape(embeddings)) return tf.reduce_sum(embeddings, axis=0)
def _single_model(self, is_train: bool = False) -> tf.Tensor: model = 'nbow' # nbow, cnn, rnn, bert attention = False embedding = None with tf.variable_scope("tree_encoder"): self._make_placeholders() self.placeholders['tokens_lengths'] = \ tf.placeholder(tf.int32, shape=[None], name='tokens_lengths') self.placeholders['rnn_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_dropout_keep_rate') self.placeholders['rnn_recurrent_dropout_keep_rate'] = \ tf.placeholder(tf.float32, shape=[], name='rnn_recurrent_dropout_keep_rate') if model == 'nbow': seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens']) seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B if attention: embedding = Common.yet_attention_layer( seq_tokens_embeddings) else: embedding = pool_sequence_embedding( self.get_hyper('nbow_pool_mode').lower(), sequence_token_embeddings=seq_tokens_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask) elif model == 'cnn': seq_tokens_embeddings = self.embedding_layer( self.placeholders['tokens']) seq_tokens_embeddings = self.__add_position_encoding( seq_tokens_embeddings) activation_fun = get_activation( self.get_hyper('1dcnn_activation')) current_embeddings = seq_tokens_embeddings num_filters_and_width = zip( self.get_hyper('1dcnn_layer_list'), self.get_hyper('1dcnn_kernel_width')) for (layer_idx, (num_filters, kernel_width)) in enumerate(num_filters_and_width): next_embeddings = tf.layers.conv1d( inputs=current_embeddings, filters=num_filters, kernel_size=kernel_width, padding="same") # Add residual connections past the first layer. if self.get_hyper('1dcnn_add_residual_connections' ) and layer_idx > 0: next_embeddings += current_embeddings current_embeddings = activation_fun(next_embeddings) current_embeddings = tf.nn.dropout( current_embeddings, keep_prob=self.placeholders['dropout_keep_rate']) if attention: embedding = Common.yet_attention_layer(current_embeddings) else: seq_token_mask = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_mask, axis=1) # B embedding = pool_sequence_embedding( self.get_hyper('1dcnn_pool_mode').lower(), sequence_token_embeddings=current_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_mask) elif model == 'rnn': seq_tokens = self.placeholders['tokens'] seq_tokens_embeddings = self.embedding_layer(seq_tokens) seq_tokens_lengths = self.placeholders['tokens_lengths'] rnn_final_state, token_embeddings = self._encode_with_rnn( seq_tokens_embeddings, seq_tokens_lengths) output_pool_mode = self.get_hyper('rnn_pool_mode').lower() if output_pool_mode == 'rnn_final': embedding = rnn_final_state else: if attention: embedding = Common.yet_attention_layer( token_embeddings) else: token_mask = tf.expand_dims(tf.range( tf.shape(seq_tokens)[1]), axis=0) # 1 x T token_mask = tf.tile( token_mask, multiples=(tf.shape(seq_tokens_lengths)[0], 1)) # B x T token_mask = tf.cast(token_mask < tf.expand_dims( seq_tokens_lengths, axis=-1), dtype=tf.float32) # B x T embedding = pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=token_embeddings, sequence_lengths=seq_tokens_lengths, sequence_token_masks=token_mask) elif model == 'bert': config = BertConfig( vocab_size=self.get_hyper('token_vocab_size'), hidden_size=self.get_hyper('self_attention_hidden_size'), num_hidden_layers=self.get_hyper( 'self_attention_num_layers'), num_attention_heads=self.get_hyper( 'self_attention_num_heads'), intermediate_size=self.get_hyper( 'self_attention_intermediate_size')) model = BertModel(config=config, is_training=is_train, input_ids=self.placeholders['tokens'], input_mask=self.placeholders['tokens_mask'], use_one_hot_embeddings=False) output_pool_mode = self.get_hyper( 'self_attention_pool_mode').lower() if output_pool_mode == 'bert': embedding = model.get_pooled_output() else: seq_token_embeddings = model.get_sequence_output() # only when it is not pooled out, then we consider attention if attention: embedding = Common.yet_attention_layer( seq_token_embeddings) else: seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(seq_token_masks, axis=1) # B embedding = pool_sequence_embedding( output_pool_mode, sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks) else: raise ValueError('Undefined Config') return embedding