def _prepare_modules(self): """Prepare necessary modules """ self.training = tx.context.global_mode_train() ## 判断当前是否在训练 # encode ctx self.transformer_encoder = TransformerEncoder(hparams=self._hparams.transformer_encoder) # encode y self.word_embedder = WordEmbedder( vocab_size = self.vocab.size, hparams=self._hparams.wordEmbedder ) self.self_graph_encoder = SelfGraphTransformerEncoder(hparams=self._hparams.encoder) self.downmlp = MLPTransformConnector(self._hparams.dim_c) self.PRelu = PRelu(self._hparams.prelu) self.rephrase_encoder = UnidirectionalRNNEncoder(hparams=self._hparams.rephrase_encoder) self.rephrase_decoder = DynamicAttentionRNNDecoder( memory_sequence_length = self.sequence_length_yy1-1, ## use yy1's truth length ###check? cell_input_fn = lambda inputs, attention: inputs, vocab_size = self.vocab.size, hparams = self._hparams.rephrase_decoder )
def _prepare_modules(self): """Prepare necessary modules """ self.embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) self.clas_embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) self.label_connector = MLPTransformConnector(self._hparams.dim_c) self.self_graph_encoder = SelfGraphTransformerEncoder( hparams=self._hparams.encoder) self.cross_graph_encoder = CrossGraphTransformerFixedLengthDecoder( vocab_size=self.vocab.size, tau=self.gamma, hparams=self._hparams.encoder) self.classifier_graph = Conv1DClassifier( hparams=self._hparams.classifier) self.classifier_sentence = Conv1DClassifier( hparams=self._hparams.classifier) self.rephrase_encoder = UnidirectionalRNNEncoder( hparams=self._hparams.rephrase_encoder) self.rephrase_decoder = DynamicAttentionRNNDecoder( memory_sequence_length=self.sequence_length - 1, cell_input_fn=lambda inputs, attention: inputs, vocab_size=self.vocab.size, hparams=self._hparams.rephrase_decoder)
def _prepare_modules(self): """Prepare necessary modules """ self.training = tx.context.global_mode_train() ## 判断当前是否在训练 # encode ctx self.bert_encoder = BertEncoder(pretrained_model_name="bert-base-uncased", hparams=self._hparams.bert_encoder) # encode y self.word_embedder = WordEmbedder( vocab_size = self.vocab['vocab_size'], hparams=self._hparams.wordEmbedder ) self.downmlp = MLPTransformConnector(self._hparams.dim_c) self.self_transformer = SelfGraphTransformerEncoder(hparams=self._hparams.encoder) self.rephrase_encoder = UnidirectionalRNNEncoder(hparams=self._hparams.rephrase_encoder) ## Build for rephraser self.rephrase_decoder = DynamicAttentionRNNDecoder( memory_sequence_length = self.sequence_length_yy1-1, ## use yy1's truth length ###check? cell_input_fn = lambda inputs, attention: inputs, vocab_size = self.vocab['vocab_size'], hparams = self._hparams.rephrase_decoder )
def _prepare_modules(self): """Prepare necessary modules """ self.embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) self.clas_embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) self.label_connector = MLPTransformConnector(self._hparams.dim_c) self.self_graph_encoder = SelfGraphTransformerEncoder( hparams=self._hparams.encoder) self.cross_graph_encoder = CrossGraphTransformerFixedLengthDecoder( vocab_size=self.vocab.size, tau=self.gamma, hparams=self._hparams.encoder) self.classifier_graph = Conv1DClassifier( hparams=self._hparams.classifier) self.classifier_sentence = Conv1DClassifier( hparams=self._hparams.classifier) self.rephrase_encoder = UnidirectionalRNNEncoder( hparams=self._hparams.rephrase_encoder) self.rephrase_decoder = DynamicAttentionRNNDecoder( memory_sequence_length=self.sequence_length - 1, cell_input_fn=lambda inputs, attention: inputs, vocab_size=self.vocab.size, hparams=self._hparams.rephrase_decoder) self.adj_embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) self.adj_encoder = BidirectionalRNNEncoder( hparams=self._hparams.adj_encoder) self.conv1d_1 = tf.layers.Conv1D(128, kernel_size=3, strides=1, padding='same') self.conv1d_2 = tf.layers.Conv1D(256, kernel_size=3, strides=1, padding='same') self.bn1 = tf.layers.BatchNormalization() self.conv1d_3 = tf.layers.Conv1D(512, kernel_size=3, strides=1, padding='same') self.bn2 = tf.layers.BatchNormalization() self.conv1d_4 = tf.layers.Conv1D(512, kernel_size=3, strides=1, padding='same') self.bn3 = tf.layers.BatchNormalization() self.conv1d_5 = tf.layers.Conv1D(1024, kernel_size=3, strides=1, padding='same')
def test_mlp_transform_connector(self): """Tests the logic of :class:`~texar.modules.connectors.MLPTransformConnector`. """ connector = MLPTransformConnector(self._decoder_cell.state_size) output = connector(tf.zeros([5, 10])) nest.assert_same_structure(output, self._decoder_cell.state_size) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) output_ = sess.run(output) nest.assert_same_structure(output_, self._decoder_cell.state_size)
def _build_model(self, inputs, vocab, gamma, lambda_g): """Builds the model. """ embedder = WordEmbedder( vocab_size=vocab.size, hparams=self._hparams.embedder) encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) # text_ids for encoder, with BOS token removed enc_text_ids = inputs['text_ids'][:, 1:] enc_outputs, final_state = encoder(embedder(enc_text_ids), sequence_length=inputs['length']-1) z = final_state[:, self._hparams.dim_c:] # Encodes label label_connector = MLPTransformConnector(self._hparams.dim_c) # Gets the sentence representation: h = (c, z) labels0 = tf.to_float(tf.reshape(inputs['labels0'], [-1, 1])) labels1 = tf.to_float(tf.reshape(inputs['labels1'], [-1, 1])) labels2 = tf.to_float(tf.reshape(inputs['labels2'], [-1, 1])) labels3 = tf.to_float(tf.reshape(inputs['labels3'], [-1, 1])) labels = tf.concat([labels0, labels1, labels2, labels3], axis = 1) print('labels', labels) sys.stdout.flush() c = label_connector(labels) c_ = label_connector(1 - labels) h = tf.concat([c, z], 1) h_ = tf.concat([c_, z], 1) # Teacher-force decoding and the auto-encoding loss for G decoder = AttentionRNNDecoder( memory=enc_outputs, memory_sequence_length=inputs['length']-1, cell_input_fn=lambda inputs, attention: inputs, vocab_size=vocab.size, hparams=self._hparams.decoder) connector = MLPTransformConnector(decoder.state_size) g_outputs, _, _ = decoder( initial_state=connector(h), inputs=inputs['text_ids'], embedding=embedder, sequence_length=inputs['length']-1) print('labels shape', inputs['text_ids'][:, 1:], 'logits shape', g_outputs.logits) print(inputs['length'] - 1) loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy( labels=inputs['text_ids'][:, 1:], logits=g_outputs.logits, sequence_length=inputs['length']-1, average_across_timesteps=True, sum_over_timesteps=False) # Gumbel-softmax decoding, used in training start_tokens = tf.ones_like(inputs['labels0']) * vocab.bos_token_id end_token = vocab.eos_token_id gumbel_helper = GumbelSoftmaxEmbeddingHelper( embedder.embedding, start_tokens, end_token, gamma) soft_outputs_, _, soft_length_, = decoder( helper=gumbel_helper, initial_state=connector(h_)) print(g_outputs, soft_outputs_) # Greedy decoding, used in eval outputs_, _, length_ = decoder( decoding_strategy='infer_greedy', initial_state=connector(h_), embedding=embedder, start_tokens=start_tokens, end_token=end_token) # Creates classifier classifier0 = Conv1DClassifier(hparams=self._hparams.classifier) classifier1 = Conv1DClassifier(hparams=self._hparams.classifier) classifier2 = Conv1DClassifier(hparams=self._hparams.classifier) classifier3 = Conv1DClassifier(hparams=self._hparams.classifier) clas_embedder = WordEmbedder(vocab_size=vocab.size, hparams=self._hparams.embedder) clas_logits, clas_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3], clas_embedder, inputs, vocab, gamma, lambda_g, inputs['text_ids'][:, 1:], None, inputs['length']-1) loss_d_clas = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(labels), logits=clas_logits) loss_d_clas = tf.reduce_mean(loss_d_clas) accu_d = tx.evals.accuracy(labels, preds=clas_preds) # Classification loss for the generator, based on soft samples # soft_logits, soft_preds = classifier( # inputs=clas_embedder(soft_ids=soft_outputs_.sample_id), # sequence_length=soft_length_) soft_logits, soft_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3], clas_embedder, inputs, vocab, gamma, lambda_g, None, soft_outputs_.sample_id, soft_length_) print(soft_logits.shape, soft_preds.shape) loss_g_clas = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(1-labels), logits=soft_logits) loss_g_clas = tf.reduce_mean(loss_g_clas) # Accuracy on soft samples, for training progress monitoring accu_g = tx.evals.accuracy(labels=1-labels, preds=soft_preds) # Accuracy on greedy-decoded samples, for training progress monitoring # _, gdy_preds = classifier( # inputs=clas_embedder(ids=outputs_.sample_id), # sequence_length=length_) _, gdy_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3], clas_embedder, inputs, vocab, gamma, lambda_g, outputs_.sample_id, None, length_) print(gdy_preds.shape) accu_g_gdy = tx.evals.accuracy( labels=1-labels, preds=gdy_preds) # Aggregates losses loss_g = loss_g_ae + lambda_g * loss_g_clas loss_d = loss_d_clas # Creates optimizers g_vars = collect_trainable_variables( [embedder, encoder, label_connector, connector, decoder]) d_vars = collect_trainable_variables([clas_embedder, classifier0, classifier1, classifier2, classifier3]) train_op_g = get_train_op( loss_g, g_vars, hparams=self._hparams.opt) train_op_g_ae = get_train_op( loss_g_ae, g_vars, hparams=self._hparams.opt) train_op_d = get_train_op( loss_d, d_vars, hparams=self._hparams.opt) # Interface tensors self.predictions = { "predictions": clas_preds, "ground_truth": labels } self.losses = { "loss_g": loss_g, "loss_g_ae": loss_g_ae, "loss_g_clas": loss_g_clas, "loss_d": loss_d_clas } self.metrics = { "accu_d": accu_d, "accu_g": accu_g, "accu_g_gdy": accu_g_gdy, } self.train_ops = { "train_op_g": train_op_g, "train_op_g_ae": train_op_g_ae, "train_op_d": train_op_d } self.samples = { "original": inputs['text_ids'][:, 1:], "transferred": outputs_.sample_id } self.fetches_train_g = { "loss_g": self.train_ops["train_op_g"], "loss_g_ae": self.losses["loss_g_ae"], "loss_g_clas": self.losses["loss_g_clas"], "accu_g": self.metrics["accu_g"], "accu_g_gdy": self.metrics["accu_g_gdy"], } self.fetches_train_d = { "loss_d": self.train_ops["train_op_d"], "accu_d": self.metrics["accu_d"] } fetches_eval = {"batch_size": get_batch_size(inputs['text_ids'])} fetches_eval.update(self.losses) fetches_eval.update(self.metrics) fetches_eval.update(self.samples) fetches_eval.update(self.predictions) self.fetches_eval = fetches_eval
def _build_model(self, inputs, vocab, finputs, minputs, gamma): """Builds the model. """ self.inputs = inputs self.finputs = finputs self.minputs = minputs self.vocab = vocab self.embedder = WordEmbedder(vocab_size=self.vocab.size, hparams=self._hparams.embedder) # maybe later have to try BidirectionalLSTMEncoder self.encoder = UnidirectionalRNNEncoder( hparams=self._hparams.encoder) #GRU cell # text_ids for encoder, with BOS(begin of sentence) token removed self.enc_text_ids = self.inputs['text_ids'][:, 1:] self.enc_outputs, self.final_state = self.encoder( self.embedder(self.enc_text_ids), sequence_length=self.inputs['length'] - 1) h = self.final_state # Teacher-force decoding and the auto-encoding loss for G self.decoder = AttentionRNNDecoder( memory=self.enc_outputs, memory_sequence_length=self.inputs['length'] - 1, cell_input_fn=lambda inputs, attention: inputs, #default: lambda inputs, attention: tf.concat([inputs, attention], -1), which cancats regular RNN cell inputs with attentions. vocab_size=self.vocab.size, hparams=self._hparams.decoder) self.connector = MLPTransformConnector(self.decoder.state_size) self.g_outputs, _, _ = self.decoder( initial_state=self.connector(h), inputs=self.inputs['text_ids'], embedding=self.embedder, sequence_length=self.inputs['length'] - 1) self.loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy( labels=self.inputs['text_ids'][:, 1:], logits=self.g_outputs.logits, sequence_length=self.inputs['length'] - 1, average_across_timesteps=True, sum_over_timesteps=False) # Greedy decoding, used in eval (and RL training) start_tokens = tf.ones_like( self.inputs['labels']) * self.vocab.bos_token_id end_token = self.vocab.eos_token_id self.outputs, _, length = self.decoder( #也许可以尝试之后把这个换成 "infer_sample"看效果 decoding_strategy='infer_greedy', initial_state=self.connector(h), embedding=self.embedder, start_tokens=start_tokens, end_token=end_token) # Creates optimizers self.g_vars = collect_trainable_variables( [self.embedder, self.encoder, self.connector, self.decoder]) self.train_op_g_ae = get_train_op(self.loss_g_ae, self.g_vars, hparams=self._hparams.opt) # Interface tensors self.samples = { "batch_size": get_batch_size(self.inputs['text_ids']), "original": self.inputs['text_ids'][:, 1:], "transferred": self.outputs.sample_id #outputs 是infer_greedy的结果 } ############################ female sentiment regression model #现在只用了convnet不知道效果,之后可以试试RNN decoding看regression的准确度,或者把两个结合一下(concat成一个向量) self.fconvnet = Conv1DNetwork( hparams=self._hparams.convnet ) #[batch_size, time_steps, embedding_dim] (default input) #convnet = Conv1DNetwork() self.freg_embedder = WordEmbedder( vocab_size=self.vocab.size, hparams=self._hparams.embedder ) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:])) self.fconv_output = self.fconvnet(inputs=self.freg_embedder( ids=self.finputs['text_ids'][:, 1:])) #(64, 128) 等一会做一下finputs!!! p = {"type": "Dense", "kwargs": {'units': 1}} self.fdense_layer = tx.core.layers.get_layer(hparams=p) self.freg_output = self.fdense_layer(inputs=self.fconv_output) ''' #考虑 self.fenc_text_ids = self.finputs['text_ids'][:, 1:] self.fencoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) #GRU cell self.fenc_outputs, self.ffinal_state = self.fencoder(self.freg_embedder(self.fenc_text_ids),sequence_length=self.finputs['length']-1) self.freg_output = self.fdense_layer(inputs = tf.concat([self.fconv_output, self.ffinal_state], -1)) ''' self.fprediction = tf.reshape(self.freg_output, [-1]) self.fground_truth = tf.to_float(self.finputs['labels']) self.floss_reg_single = tf.pow( self.fprediction - self.fground_truth, 2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update self.floss_reg_batch = tf.reduce_mean( self.floss_reg_single) #对一个batch求和平均的loss #self.freg_vars = collect_trainable_variables([self.freg_embedder, self.fconvnet, self.fencoder, self.fdense_layer]) self.freg_vars = collect_trainable_variables( [self.freg_embedder, self.fconvnet, self.fdense_layer]) self.ftrain_op_d = get_train_op(self.floss_reg_batch, self.freg_vars, hparams=self._hparams.opt) self.freg_sample = { "fprediction": self.fprediction, "fground_truth": self.fground_truth, "fsent": self.finputs['text_ids'][:, 1:] } ############################ male sentiment regression model self.mconvnet = Conv1DNetwork( hparams=self._hparams.convnet ) #[batch_size, time_steps, embedding_dim] (default input) #convnet = Conv1DNetwork() self.mreg_embedder = WordEmbedder( vocab_size=self.vocab.size, hparams=self._hparams.embedder ) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:])) self.mconv_output = self.mconvnet(inputs=self.mreg_embedder( ids=self.minputs['text_ids'][:, 1:])) #(64, 128) p = {"type": "Dense", "kwargs": {'units': 1}} self.mdense_layer = tx.core.layers.get_layer(hparams=p) self.mreg_output = self.mdense_layer(inputs=self.mconv_output) ''' #考虑 self.menc_text_ids = self.minputs['text_ids'][:, 1:] self.mencoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) #GRU cell self.menc_outputs, self.mfinal_state = self.mencoder(self.mreg_embedder(self.menc_text_ids),sequence_length=self.minputs['length']-1) self.mreg_output = self.mdense_layer(inputs = tf.concat([self.mconv_output, self.mfinal_state], -1)) ''' self.mprediction = tf.reshape(self.mreg_output, [-1]) self.mground_truth = tf.to_float(self.minputs['labels']) self.mloss_reg_single = tf.pow( self.mprediction - self.mground_truth, 2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update self.mloss_reg_batch = tf.reduce_mean( self.mloss_reg_single) #对一个batch求和平均的loss #self.mreg_vars = collect_trainable_variables([self.mreg_embedder, self.mconvnet, self.mencoder, self.mdense_layer]) self.mreg_vars = collect_trainable_variables( [self.mreg_embedder, self.mconvnet, self.mdense_layer]) self.mtrain_op_d = get_train_op(self.mloss_reg_batch, self.mreg_vars, hparams=self._hparams.opt) self.mreg_sample = { "mprediction": self.mprediction, "mground_truth": self.mground_truth, "msent": self.minputs['text_ids'][:, 1:] } ###### get self.pre_dif when doing RL training (for transferred sents) ### pass to female regression model self.RL_fconv_output = self.fconvnet(inputs=self.freg_embedder( ids=self.outputs.sample_id)) #(64, 128) 等一会做一下finputs!!! self.RL_freg_output = self.fdense_layer(inputs=self.RL_fconv_output) self.RL_fprediction = tf.reshape(self.RL_freg_output, [-1]) ### pass to male regression model self.RL_mconv_output = self.mconvnet(inputs=self.mreg_embedder( ids=self.outputs.sample_id)) #(64, 128) 等一会做一下finputs!!! self.RL_mreg_output = self.mdense_layer(inputs=self.RL_mconv_output) self.RL_mprediction = tf.reshape(self.RL_mreg_output, [-1]) self.pre_dif = tf.abs(self.RL_fprediction - self.RL_mprediction) ###### get self.Ypre_dif for original sents ### pass to female regression model self.YRL_fconv_output = self.fconvnet(inputs=self.freg_embedder( ids=self.inputs['text_ids'][:, 1:])) #(64, 128) 等一会做一下finputs!!! self.YRL_freg_output = self.fdense_layer(inputs=self.YRL_fconv_output) self.YRL_fprediction = tf.reshape(self.YRL_freg_output, [-1]) ### pass to male regression model self.YRL_mconv_output = self.mconvnet(inputs=self.mreg_embedder( ids=self.inputs['text_ids'][:, 1:])) #(64, 128) 等一会做一下finputs!!! self.YRL_mreg_output = self.mdense_layer(inputs=self.YRL_mconv_output) self.YRL_mprediction = tf.reshape(self.YRL_mreg_output, [-1]) self.Ypre_dif = tf.abs(self.YRL_fprediction - self.YRL_mprediction) ######################## RL training ''' def fil(elem): return tf.where(elem > 1.3, tf.minimum(elem,3), 0) def fil_pushsmall(elem): return tf.add(tf.where(elem <0.5, 1, 0),tf.where(elem>1.5,-0.5*elem,0)) ''' ''' #缩小prediction差异 def fil1(elem): return tf.where(elem<0.5,1.0,0.0) def fil2(elem): return tf.where(elem>1.5,-0.5*elem,0.0) ''' #扩大prediction差异 def fil1(elem): return tf.where(elem < 0.5, -0.01, 0.0) def fil2(elem): return tf.where(elem > 1.3, elem, 0.0) # 维数是(batch_size,time_step),对应的是一个batch中每一个sample的每一个timestep的loss self.beginning_loss_g_RL2 = tf.nn.sparse_softmax_cross_entropy_with_logits( _sentinel=None, labels=self.outputs.sample_id, logits=self.outputs.logits, name=None) self.middle_loss_g_RL2 = tf.reduce_sum( self.beginning_loss_g_RL2, axis=1 ) #(batch_size,),这样得到的loss是每一个句子的loss(对time_steps求和,对batch不求和) #trivial "RL" training with all weight set to 1 #final_loss_g_RL2 = tf.reduce_sum(self.middle_loss_g_RL2) #RL training self.filtered = tf.add(tf.map_fn(fil1, self.pre_dif), tf.map_fn(fil2, self.pre_dif)) self.updated_loss_per_sent = tf.multiply( self.filtered, self.middle_loss_g_RL2) #haven't set threshold for weight update self.updated_loss_per_batch = tf.reduce_sum( self.updated_loss_per_sent) #############!!有一个问题: # 我想update每一个句子的loss,但是train_updated那里会报错,所以好像只能updateloss的求和,这样是相当于update每一个句子的loss吗? self.vars_updated = collect_trainable_variables( [self.connector, self.decoder]) self.train_updated = get_train_op(self.updated_loss_per_batch, self.vars_updated, hparams=self._hparams.opt) self.train_updated_interface = { "pre_dif": self.pre_dif, "updated_loss_per_sent": self.updated_loss_per_sent, "updated_loss_per_batch": self.updated_loss_per_batch, } ### Train AE and RL together self.loss_AERL = gamma * self.updated_loss_per_batch + self.loss_g_ae self.vars_AERL = collect_trainable_variables( [self.connector, self.decoder]) self.train_AERL = get_train_op(self.loss_AERL, self.vars_AERL, hparams=self._hparams.opt)
def generator(text_ids, text_keyword_id, text_keyword_length, labels, text_length, temperature, vocab_size, batch_size, seq_len, gen_emb_dim, mem_slots, head_size, num_heads, hidden_dim, start_token): # Source word embedding src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size, hparams=hparams.embedder) src_word_embeds = src_word_embedder(text_keyword_id) encoder = UnidirectionalRNNEncoder(hparams=hparams.encoder) enc_outputs, final_state = encoder(inputs=src_word_embeds, sequence_length=text_keyword_length) # modify sentiment label label_connector = MLPTransformConnector(output_size=hparams.dim_c) state_connector = MLPTransformConnector(output_size=700) labels = tf.to_float(tf.reshape(labels, [batch_size, 1])) c = label_connector(labels) c_ = label_connector(1 - labels) h = tf.concat([c, final_state], axis=1) h_ = tf.concat([c_, final_state], axis=1) state = state_connector(h) state_ = state_connector(h_) decoder = AttentionRNNDecoder( memory=enc_outputs, memory_sequence_length=text_keyword_length, cell_input_fn=lambda inputs, attention: inputs, vocab_size=vocab_size, hparams=hparams.decoder) # For training g_outputs, _, _ = decoder(initial_state=state, inputs=text_ids, embedding=src_word_embedder, sequence_length=tf.convert_to_tensor( np.array([(seq_len - 1) for i in range(batch_size)], dtype=np.int32))) # e = g_outputs.cell_output start_tokens = np.ones(batch_size, int) end_token = int(2) # Greedy decoding, used in eval outputs_, _, length_ = decoder(decoding_strategy='infer_greedy', initial_state=state_, embedding=src_word_embedder, start_tokens=start_tokens, end_token=end_token) pretrain_loss = tx.losses.sequence_sparse_softmax_cross_entropy( labels=text_ids[:, 1:], logits=g_outputs.logits, sequence_length=text_length - 1, average_across_timesteps=True, sum_over_timesteps=False) # # Gumbel-softmax decoding, used in training gumbel_helper = GumbelSoftmaxEmbeddingHelper(src_word_embedder.embedding, start_tokens, end_token, temperature) gumbel_outputs, _, sequence_lengths = decoder(helper=gumbel_helper, initial_state=state_) # max_index = tf.argmax(gumbel_outputs.logits, axis=2) gen_o = tf.reduce_sum(tf.reduce_max(outputs_.logits, axis=2), 1) return gumbel_outputs.logits, outputs_.sample_id, pretrain_loss, gen_o
def _build_model(self, inputs, vocab, gamma, lambda_g, lambda_z, lambda_z1, lambda_z2, lambda_ae): embedder = WordEmbedder(vocab_size=vocab.size, hparams=self._hparams.embedder) encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) enc_text_ids = inputs['text_ids'][:, 1:] enc_outputs, final_state = encoder(embedder(enc_text_ids), sequence_length=inputs['length'] - 1) z = final_state[:, self._hparams.dim_c:] # -------------------- CLASSIFIER --------------------- n_classes = self._hparams.num_classes z_classifier_l1 = MLPTransformConnector( 256, hparams=self._hparams.z_classifier_l1) z_classifier_l2 = MLPTransformConnector( 64, hparams=self._hparams.z_classifier_l2) z_classifier_out = MLPTransformConnector( n_classes if n_classes > 2 else 1) z_logits = z_classifier_l1(z) z_logits = z_classifier_l2(z_logits) z_logits = z_classifier_out(z_logits) z_pred = tf.greater(z_logits, 0) z_logits = tf.reshape(z_logits, [-1]) z_pred = tf.to_int64(tf.reshape(z_pred, [-1])) loss_z_clas = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(inputs['labels']), logits=z_logits) loss_z_clas = tf.reduce_mean(loss_z_clas) accu_z_clas = tx.evals.accuracy(labels=inputs['labels'], preds=z_pred) # -------------------________________--------------------- label_connector = MLPTransformConnector(self._hparams.dim_c) labels = tf.to_float(tf.reshape(inputs['labels'], [-1, 1])) c = label_connector(labels) c_ = label_connector(1 - labels) h = tf.concat([c, z], 1) h_ = tf.concat([c_, z], 1) # Teacher-force decoding and the auto-encoding loss for G decoder = AttentionRNNDecoder( memory=enc_outputs, memory_sequence_length=inputs['length'] - 1, cell_input_fn=lambda inputs, attention: inputs, vocab_size=vocab.size, hparams=self._hparams.decoder) connector = MLPTransformConnector(decoder.state_size) g_outputs, _, _ = decoder(initial_state=connector(h), inputs=inputs['text_ids'], embedding=embedder, sequence_length=inputs['length'] - 1) loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy( labels=inputs['text_ids'][:, 1:], logits=g_outputs.logits, sequence_length=inputs['length'] - 1, average_across_timesteps=True, sum_over_timesteps=False) # Gumbel-softmax decoding, used in training start_tokens = tf.ones_like(inputs['labels']) * vocab.bos_token_id end_token = vocab.eos_token_id gumbel_helper = GumbelSoftmaxEmbeddingHelper(embedder.embedding, start_tokens, end_token, gamma) soft_outputs_, _, soft_length_, = decoder(helper=gumbel_helper, initial_state=connector(h_)) soft_outputs, _, soft_length, = decoder(helper=gumbel_helper, initial_state=connector(h)) # ---------------------------- SHIFTED LOSS ------------------------------------- _, encoder_final_state_ = encoder( embedder(soft_ids=soft_outputs_.sample_id), sequence_length=inputs['length'] - 1) _, encoder_final_state = encoder( embedder(soft_ids=soft_outputs.sample_id), sequence_length=inputs['length'] - 1) new_z_ = encoder_final_state_[:, self._hparams.dim_c:] new_z = encoder_final_state[:, self._hparams.dim_c:] cos_distance_z_ = tf.abs( tf.losses.cosine_distance(tf.nn.l2_normalize(z, axis=1), tf.nn.l2_normalize(new_z_, axis=1), axis=1)) cos_distance_z = tf.abs( tf.losses.cosine_distance(tf.nn.l2_normalize(z, axis=1), tf.nn.l2_normalize(new_z, axis=1), axis=1)) # ----------------------------______________------------------------------------- # Greedy decoding, used in eval outputs_, _, length_ = decoder(decoding_strategy='infer_greedy', initial_state=connector(h_), embedding=embedder, start_tokens=start_tokens, end_token=end_token) # Creates classifier classifier = Conv1DClassifier(hparams=self._hparams.classifier) clas_embedder = WordEmbedder(vocab_size=vocab.size, hparams=self._hparams.embedder) # Classification loss for the classifier clas_logits, clas_preds = classifier( inputs=clas_embedder(ids=inputs['text_ids'][:, 1:]), sequence_length=inputs['length'] - 1) loss_d_clas = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(inputs['labels']), logits=clas_logits) loss_d_clas = tf.reduce_mean(loss_d_clas) accu_d = tx.evals.accuracy(labels=inputs['labels'], preds=clas_preds) # Classification loss for the generator, based on soft samples soft_logits, soft_preds = classifier( inputs=clas_embedder(soft_ids=soft_outputs_.sample_id), sequence_length=soft_length_) loss_g_clas = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(1 - inputs['labels']), logits=soft_logits) loss_g_clas = tf.reduce_mean(loss_g_clas) # Accuracy on soft samples, for training progress monitoring accu_g = tx.evals.accuracy(labels=1 - inputs['labels'], preds=soft_preds) # Accuracy on greedy-decoded samples, for training progress monitoring _, gdy_preds = classifier(inputs=clas_embedder(ids=outputs_.sample_id), sequence_length=length_) accu_g_gdy = tx.evals.accuracy(labels=1 - inputs['labels'], preds=gdy_preds) # Aggregates losses loss_g = lambda_ae * loss_g_ae + \ lambda_g * loss_g_clas + \ lambda_z1 * cos_distance_z + cos_distance_z_ * lambda_z2 \ - lambda_z * loss_z_clas loss_d = loss_d_clas loss_z = loss_z_clas # Creates optimizers g_vars = collect_trainable_variables( [embedder, encoder, label_connector, connector, decoder]) d_vars = collect_trainable_variables([clas_embedder, classifier]) z_vars = collect_trainable_variables( [z_classifier_l1, z_classifier_l2, z_classifier_out]) train_op_g = get_train_op(loss_g, g_vars, hparams=self._hparams.opt) train_op_g_ae = get_train_op(loss_g_ae, g_vars, hparams=self._hparams.opt) train_op_d = get_train_op(loss_d, d_vars, hparams=self._hparams.opt) train_op_z = get_train_op(loss_z, z_vars, hparams=self._hparams.opt) # Interface tensors self.losses = { "loss_g": loss_g, "loss_g_ae": loss_g_ae, "loss_g_clas": loss_g_clas, "loss_d": loss_d_clas, "loss_z_clas": loss_z_clas, "loss_cos_": cos_distance_z_, "loss_cos": cos_distance_z } self.metrics = { "accu_d": accu_d, "accu_g": accu_g, "accu_g_gdy": accu_g_gdy, "accu_z_clas": accu_z_clas } self.train_ops = { "train_op_g": train_op_g, "train_op_g_ae": train_op_g_ae, "train_op_d": train_op_d, "train_op_z": train_op_z } self.samples = { "original": inputs['text_ids'][:, 1:], "transferred": outputs_.sample_id, "z_vector": z, "labels_source": inputs['labels'], "labels_target": 1 - inputs['labels'], "labels_predicted": gdy_preds } self.fetches_train_g = { "loss_g": self.train_ops["train_op_g"], "loss_g_ae": self.losses["loss_g_ae"], "loss_g_clas": self.losses["loss_g_clas"], "loss_shifted_ae1": self.losses["loss_cos"], "loss_shifted_ae2": self.losses["loss_cos_"], "accu_g": self.metrics["accu_g"], "accu_g_gdy": self.metrics["accu_g_gdy"], "accu_z_clas": self.metrics["accu_z_clas"] } self.fetches_train_z = { "loss_z": self.train_ops["train_op_z"], "accu_z": self.metrics["accu_z_clas"] } self.fetches_train_d = { "loss_d": self.train_ops["train_op_d"], "accu_d": self.metrics["accu_d"] } fetches_eval = {"batch_size": get_batch_size(inputs['text_ids'])} fetches_eval.update(self.losses) fetches_eval.update(self.metrics) fetches_eval.update(self.samples) self.fetches_eval = fetches_eval
def generator(text_ids, text_keyword_id, text_keyword_length, labels, text_length, temperature, vocab_size, batch_size, seq_len, gen_emb_dim, mem_slots, head_size, num_heads, hidden_dim, start_token): is_target = tf.to_float(tf.not_equal(text_ids[:, 1:], 0)) # Source word embedding src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size, hparams=trans_config.emb) src_word_embeds = src_word_embedder(text_keyword_id) src_word_embeds = src_word_embeds * trans_config.hidden_dim**0.5 # Position embedding (shared b/w source and target) pos_embedder = tx.modules.SinusoidsPositionEmbedder( position_size=seq_len, hparams=trans_config.position_embedder_hparams) # src_seq_len = batch_data['text_keyword_length'] src_pos_embeds = pos_embedder(sequence_length=seq_len) src_input_embedding = src_word_embeds + src_pos_embeds encoder = TransformerEncoder(hparams=trans_config.encoder) encoder_output = encoder(inputs=src_input_embedding, sequence_length=text_keyword_length) # modify sentiment label label_connector = MLPTransformConnector( output_size=trans_config.hidden_dim) labels = tf.to_float(tf.reshape(labels, [-1, 1])) c = tf.reshape(label_connector(labels), [batch_size, 1, 512]) c_ = tf.reshape(label_connector(1 - labels), [batch_size, 1, 512]) encoder_output = tf.concat([c, encoder_output[:, 1:, :]], axis=1) encoder_output_ = tf.concat([c_, encoder_output[:, 1:, :]], axis=1) # The decoder ties the input word embedding with the output logit layer. # As the decoder masks out <PAD>'s embedding, which in effect means # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding # to all-zero. tgt_embedding = tf.concat([ tf.zeros(shape=[1, src_word_embedder.dim]), src_word_embedder.embedding[1:, :] ], axis=0) tgt_embedder = tx.modules.WordEmbedder(tgt_embedding) tgt_word_embeds = tgt_embedder(text_ids) tgt_word_embeds = tgt_word_embeds * trans_config.hidden_dim**0.5 tgt_seq_len = text_length tgt_pos_embeds = pos_embedder(sequence_length=tgt_seq_len) tgt_input_embedding = tgt_word_embeds + tgt_pos_embeds _output_w = tf.transpose(tgt_embedder.embedding, (1, 0)) decoder = TransformerDecoder(vocab_size=vocab_size, output_layer=_output_w, hparams=trans_config.decoder) # For training outputs = decoder(memory=encoder_output, memory_sequence_length=text_keyword_length, inputs=tgt_input_embedding, decoding_strategy='train_greedy', mode=tf.estimator.ModeKeys.TRAIN) mle_loss = transformer_utils.smoothing_cross_entropy( outputs.logits[:, :-1, :], text_ids[:, 1:], vocab_size, trans_config.loss_label_confidence) pretrain_loss = tf.reduce_sum( mle_loss * is_target) / tf.reduce_sum(is_target) # Gumbel-softmax decoding, used in training start_tokens = np.ones(batch_size, int) end_token = int(2) gumbel_helper = GumbelSoftmaxEmbeddingHelper(tgt_embedding, start_tokens, end_token, temperature) gumbel_outputs, sequence_lengths = decoder( memory=encoder_output_, memory_sequence_length=text_keyword_length, helper=gumbel_helper) # max_index = tf.argmax(gumbel_outputs.logits, axis=2) # gen_x_onehot_adv = tf.one_hot(max_index, vocab_size, sentiment.1.0, 0.0) gen_o = tf.reduce_sum(tf.reduce_max(gumbel_outputs.logits, axis=2)) return gumbel_outputs.logits, gumbel_outputs.sample_id, pretrain_loss, gen_o