def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"]) inputs_padding = model_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, self.params["dtype"]) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) pos_encoding = tf.cast(pos_encoding, self.params["dtype"]) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) return self.encoder_stack( encoder_inputs, attention_bias, inputs_padding, training=training)
def encode(self, inputs, attention_bias, pad_num, pos): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. #length=inputs.shape[1] embedded_inputs = tf.reshape( self.embedding_layer(tf.reshape(inputs, [-1, 300])), [self.batch_size, -1, hiddensize]) inputs_padding = model_utils.get_padding(pad_num) with tf.name_scope("add_pos_encoding"): length = embedded_inputs.shape[1] #pos_encoding = model_utils.get_position_encoding(length, hiddensize) #print(embedded_inputs) #print(pos_encoding) #encoder_inputs=embedded_inputs encoder_inputs = embedded_inputs + pos # if self.train: # encoder_inputs = tf.nn.dropout( # encoder_inputs, 1 - dropout) encoderout = self.encoder_stack(encoder_inputs, attention_bias, inputs_padding) out = self.outfc(tf.reshape(encoderout[:, 0, :], [-1, hiddensize])) return out
def encode(self, inputs, attention_bias, input_types=None): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if input_types is not None: input_types = model_utils.get_input_types( input_types, self.params["hidden_size"], num_types=self.params["num_types"]) encoder_inputs = encoder_inputs + input_types if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def gen_used_text(word2idx=None, texts=None, max_len=None, idx2word=None, text_idx=None, choice="string"): """ Usually for sentences of one model Given original text, return the actually model-used text (with _unk, _pad) if given text_idx --> restore shorter-texts from text_idx else --> get text_idx with word2idx and do reversal step :param word2idx: :param texts: :param max_len: :param idx2word: :param text_idx: :return: """ # Not given text_idx: generate according to max_len and word2idx if text_idx is None: if (word2idx is None) or (texts is None) or (max_len is None): print("Need more information to gen text_idx") else: text_idx = sentences_to_idx(texts, word2idx) text_idx, _ = get_padding(text_idx, max_len=max_len) if idx2word is None: idx2word = dict((v, k) for k, v in word2idx.items()) # text has max_length and paddings/unks if choice == "string": shorter_texts = [" ".join([idx2word[idx] for idx in t]) for t in text_idx] else: shorter_texts = [[idx2word[idx] for idx in t] for t in text_idx] return shorter_texts
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. 流程: Embedding -> 位置编码 -> dropout -> encoder_stack inputs: 原始的输入句子, shape=[batch_size, input_length]. attention_bias: padding的位置标记为-1e9,其余位置标记为0. shape=[batch_size, 1, 1, input_length] 返回encoder提取的特征: shape=[batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # shape=(batch_size, length, embedding_dim) # embdding的过程中,padding的位置输出的是全0的向量 embedded_inputs = self.embedding_layer_encoder(inputs) # embedding length = tf.shape(embedded_inputs)[1] # 获取 padding 的位置,标记为1, 其余标记为0. shape=(batchsize, length) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): # 位置编码,shape=(length, hidden_size) pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) # 组合, shape=(length, hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) # 最后一步:调用 encoder_stack处理 return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) # with tf.variable_scope( # self.scope, initializer=tf.truncated_normal_initializer(stddev=0.01), reuse=tf.AUTO_REUSE): # self.embeddings = tf.layers.dense( # self.out1, self.params.output_dim, bias_initializer=tf.constant_initializer(0.1), name="dense") return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def Embedding(self, x): # args: x shape: [ batch_size, length] # return: [batch_size, length, hidden_size] hparams = self.hparams if hparams['embedding_model'] == 'transformer': self.embedding_layer = embedding_layer.EmbeddingSharedWeights( hparams["vocab_size"], hparams["hidden_size"]) embedded_inputs = self.embedding_layer(x) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, hparams["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.hparams['train']: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.hparams["layer_postprocess_dropout"]) self.inputs_padding = model_utils.get_padding(x) self.attention_bias = model_utils.get_padding_bias(x) return encoder_inputs
def test_get_padding(self): x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]]) padding = model_utils.get_padding(x, padding_value=0) with self.test_session() as sess: padding = sess.run(padding) self.assertAllEqual([[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)
def build_encoder(self, x, encoder_emb_inp, attention_bias, reuse=False): ## x: (batch_size, enc_len) padding_bias = attention_bias with tf.variable_scope( "Encoder", reuse=reuse, initializer=tf.contrib.layers.xavier_initializer()): encoder = Encoder(num_layers=self.num_layers, num_heads=self.num_heads, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.hidden_dim, ffn_dim=self.ffn_dim) padding = model_utils.get_padding(x) return encoder.build(encoder_emb_inp, padding_bias, padding=padding)
def test_get_padding(self): x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]]) padding = model_utils.get_padding(x, padding_value=0) self.assertAllEqual([[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)