Пример #1
0
    def _prepare_modules(self):
        """Prepare necessary modules
        """
        self.training = tx.context.global_mode_train()                                                                 ## 判断当前是否在训练
        
        # encode ctx
        self.transformer_encoder = TransformerEncoder(hparams=self._hparams.transformer_encoder)
        
        # encode y
        self.word_embedder = WordEmbedder(
            vocab_size = self.vocab.size, 
            hparams=self._hparams.wordEmbedder
        )
        self.self_graph_encoder = SelfGraphTransformerEncoder(hparams=self._hparams.encoder)

        self.downmlp = MLPTransformConnector(self._hparams.dim_c)
        self.PRelu = PRelu(self._hparams.prelu)

        self.rephrase_encoder = UnidirectionalRNNEncoder(hparams=self._hparams.rephrase_encoder)
        self.rephrase_decoder = DynamicAttentionRNNDecoder(
            memory_sequence_length = self.sequence_length_yy1-1,                                                       ## use yy1's truth length ###check?
            cell_input_fn = lambda inputs, attention: inputs,
            vocab_size = self.vocab.size,
            hparams = self._hparams.rephrase_decoder
        )
Пример #2
0
    def _prepare_modules(self):
        """Prepare necessary modules
        """
        self.embedder = WordEmbedder(vocab_size=self.vocab.size,
                                     hparams=self._hparams.embedder)
        self.clas_embedder = WordEmbedder(vocab_size=self.vocab.size,
                                          hparams=self._hparams.embedder)
        self.label_connector = MLPTransformConnector(self._hparams.dim_c)

        self.self_graph_encoder = SelfGraphTransformerEncoder(
            hparams=self._hparams.encoder)
        self.cross_graph_encoder = CrossGraphTransformerFixedLengthDecoder(
            vocab_size=self.vocab.size,
            tau=self.gamma,
            hparams=self._hparams.encoder)

        self.classifier_graph = Conv1DClassifier(
            hparams=self._hparams.classifier)
        self.classifier_sentence = Conv1DClassifier(
            hparams=self._hparams.classifier)

        self.rephrase_encoder = UnidirectionalRNNEncoder(
            hparams=self._hparams.rephrase_encoder)
        self.rephrase_decoder = DynamicAttentionRNNDecoder(
            memory_sequence_length=self.sequence_length - 1,
            cell_input_fn=lambda inputs, attention: inputs,
            vocab_size=self.vocab.size,
            hparams=self._hparams.rephrase_decoder)
Пример #3
0
    def _prepare_modules(self):
        """Prepare necessary modules
        """
        self.training = tx.context.global_mode_train()                                                                 ## 判断当前是否在训练
        
        # encode ctx
        self.bert_encoder = BertEncoder(pretrained_model_name="bert-base-uncased",
                                hparams=self._hparams.bert_encoder)
        
        # encode y
        self.word_embedder = WordEmbedder(
            vocab_size = self.vocab['vocab_size'], 
            hparams=self._hparams.wordEmbedder
        )

        self.downmlp = MLPTransformConnector(self._hparams.dim_c)
        self.self_transformer = SelfGraphTransformerEncoder(hparams=self._hparams.encoder)

        self.rephrase_encoder = UnidirectionalRNNEncoder(hparams=self._hparams.rephrase_encoder)                       ## Build for rephraser
        self.rephrase_decoder = DynamicAttentionRNNDecoder(
            memory_sequence_length = self.sequence_length_yy1-1,                                                       ## use yy1's truth length ###check?
            cell_input_fn = lambda inputs, attention: inputs,
            vocab_size = self.vocab['vocab_size'],
            hparams = self._hparams.rephrase_decoder
        )
Пример #4
0
    def _prepare_modules(self):
        """Prepare necessary modules
        """
        self.embedder = WordEmbedder(vocab_size=self.vocab.size,
                                     hparams=self._hparams.embedder)
        self.clas_embedder = WordEmbedder(vocab_size=self.vocab.size,
                                          hparams=self._hparams.embedder)
        self.label_connector = MLPTransformConnector(self._hparams.dim_c)

        self.self_graph_encoder = SelfGraphTransformerEncoder(
            hparams=self._hparams.encoder)
        self.cross_graph_encoder = CrossGraphTransformerFixedLengthDecoder(
            vocab_size=self.vocab.size,
            tau=self.gamma,
            hparams=self._hparams.encoder)

        self.classifier_graph = Conv1DClassifier(
            hparams=self._hparams.classifier)
        self.classifier_sentence = Conv1DClassifier(
            hparams=self._hparams.classifier)

        self.rephrase_encoder = UnidirectionalRNNEncoder(
            hparams=self._hparams.rephrase_encoder)
        self.rephrase_decoder = DynamicAttentionRNNDecoder(
            memory_sequence_length=self.sequence_length - 1,
            cell_input_fn=lambda inputs, attention: inputs,
            vocab_size=self.vocab.size,
            hparams=self._hparams.rephrase_decoder)

        self.adj_embedder = WordEmbedder(vocab_size=self.vocab.size,
                                         hparams=self._hparams.embedder)
        self.adj_encoder = BidirectionalRNNEncoder(
            hparams=self._hparams.adj_encoder)

        self.conv1d_1 = tf.layers.Conv1D(128,
                                         kernel_size=3,
                                         strides=1,
                                         padding='same')
        self.conv1d_2 = tf.layers.Conv1D(256,
                                         kernel_size=3,
                                         strides=1,
                                         padding='same')
        self.bn1 = tf.layers.BatchNormalization()
        self.conv1d_3 = tf.layers.Conv1D(512,
                                         kernel_size=3,
                                         strides=1,
                                         padding='same')
        self.bn2 = tf.layers.BatchNormalization()
        self.conv1d_4 = tf.layers.Conv1D(512,
                                         kernel_size=3,
                                         strides=1,
                                         padding='same')
        self.bn3 = tf.layers.BatchNormalization()
        self.conv1d_5 = tf.layers.Conv1D(1024,
                                         kernel_size=3,
                                         strides=1,
                                         padding='same')
Пример #5
0
    def test_mlp_transform_connector(self):
        """Tests the logic of
        :class:`~texar.modules.connectors.MLPTransformConnector`.
        """
        connector = MLPTransformConnector(self._decoder_cell.state_size)
        output = connector(tf.zeros([5, 10]))
        nest.assert_same_structure(output, self._decoder_cell.state_size)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())

            output_ = sess.run(output)
            nest.assert_same_structure(output_, self._decoder_cell.state_size)
    def _build_model(self, inputs, vocab, gamma, lambda_g):
        """Builds the model.
        """
        embedder = WordEmbedder(
            vocab_size=vocab.size,
            hparams=self._hparams.embedder)
        encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder)

        # text_ids for encoder, with BOS token removed
        enc_text_ids = inputs['text_ids'][:, 1:]
        enc_outputs, final_state = encoder(embedder(enc_text_ids),
                                           sequence_length=inputs['length']-1)
        z = final_state[:, self._hparams.dim_c:]

        # Encodes label
        label_connector = MLPTransformConnector(self._hparams.dim_c)

        # Gets the sentence representation: h = (c, z)
        labels0 = tf.to_float(tf.reshape(inputs['labels0'], [-1, 1]))
        labels1 = tf.to_float(tf.reshape(inputs['labels1'], [-1, 1]))
        labels2 = tf.to_float(tf.reshape(inputs['labels2'], [-1, 1]))
        labels3 = tf.to_float(tf.reshape(inputs['labels3'], [-1, 1]))
        labels = tf.concat([labels0, labels1, labels2, labels3], axis = 1)
        print('labels', labels)
        sys.stdout.flush()
        c = label_connector(labels)
        c_ = label_connector(1 - labels)
        h = tf.concat([c, z], 1)
        h_ = tf.concat([c_, z], 1)

        # Teacher-force decoding and the auto-encoding loss for G
        decoder = AttentionRNNDecoder(
            memory=enc_outputs,
            memory_sequence_length=inputs['length']-1,
            cell_input_fn=lambda inputs, attention: inputs,
            vocab_size=vocab.size,
            hparams=self._hparams.decoder)

        connector = MLPTransformConnector(decoder.state_size)

        g_outputs, _, _ = decoder(
            initial_state=connector(h), inputs=inputs['text_ids'],
            embedding=embedder, sequence_length=inputs['length']-1)

        print('labels shape', inputs['text_ids'][:, 1:], 'logits shape', g_outputs.logits)
        print(inputs['length'] - 1)
        loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(
            labels=inputs['text_ids'][:, 1:],
            logits=g_outputs.logits,
            sequence_length=inputs['length']-1,
            average_across_timesteps=True,
            sum_over_timesteps=False)

        # Gumbel-softmax decoding, used in training
        start_tokens = tf.ones_like(inputs['labels0']) * vocab.bos_token_id
        end_token = vocab.eos_token_id
        gumbel_helper = GumbelSoftmaxEmbeddingHelper(
            embedder.embedding, start_tokens, end_token, gamma)

        soft_outputs_, _, soft_length_, = decoder(
            helper=gumbel_helper, initial_state=connector(h_))

        print(g_outputs, soft_outputs_)

        # Greedy decoding, used in eval
        outputs_, _, length_ = decoder(
            decoding_strategy='infer_greedy', initial_state=connector(h_),
            embedding=embedder, start_tokens=start_tokens, end_token=end_token)
        # Creates classifier
        classifier0 = Conv1DClassifier(hparams=self._hparams.classifier)
        classifier1 = Conv1DClassifier(hparams=self._hparams.classifier)
        classifier2 = Conv1DClassifier(hparams=self._hparams.classifier)
        classifier3 = Conv1DClassifier(hparams=self._hparams.classifier)
        clas_embedder = WordEmbedder(vocab_size=vocab.size,
                                     hparams=self._hparams.embedder)

        clas_logits, clas_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3],
            clas_embedder, inputs, vocab, gamma, lambda_g, inputs['text_ids'][:, 1:], None, inputs['length']-1)
        loss_d_clas = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.to_float(labels), logits=clas_logits)
        loss_d_clas = tf.reduce_mean(loss_d_clas)
        accu_d = tx.evals.accuracy(labels, preds=clas_preds)

        # Classification loss for the generator, based on soft samples
        # soft_logits, soft_preds = classifier(
        #     inputs=clas_embedder(soft_ids=soft_outputs_.sample_id),
        #     sequence_length=soft_length_)
        soft_logits, soft_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3],
            clas_embedder, inputs, vocab, gamma, lambda_g, None, soft_outputs_.sample_id, soft_length_)
        print(soft_logits.shape, soft_preds.shape)
        loss_g_clas = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.to_float(1-labels), logits=soft_logits)
        loss_g_clas = tf.reduce_mean(loss_g_clas)

        # Accuracy on soft samples, for training progress monitoring
        accu_g = tx.evals.accuracy(labels=1-labels, preds=soft_preds)

        # Accuracy on greedy-decoded samples, for training progress monitoring
        # _, gdy_preds = classifier(
        #     inputs=clas_embedder(ids=outputs_.sample_id),
        #     sequence_length=length_)
        _, gdy_preds = self._high_level_classifier([classifier0, classifier1, classifier2, classifier3],
            clas_embedder, inputs, vocab, gamma, lambda_g, outputs_.sample_id, None, length_)
        print(gdy_preds.shape)
        accu_g_gdy = tx.evals.accuracy(
            labels=1-labels, preds=gdy_preds)

        # Aggregates losses
        loss_g = loss_g_ae + lambda_g * loss_g_clas
        loss_d = loss_d_clas

        # Creates optimizers
        g_vars = collect_trainable_variables(
            [embedder, encoder, label_connector, connector, decoder])
        d_vars = collect_trainable_variables([clas_embedder, classifier0, classifier1, classifier2, classifier3])

        train_op_g = get_train_op(
            loss_g, g_vars, hparams=self._hparams.opt)
        train_op_g_ae = get_train_op(
            loss_g_ae, g_vars, hparams=self._hparams.opt)
        train_op_d = get_train_op(
            loss_d, d_vars, hparams=self._hparams.opt)

        # Interface tensors
        self.predictions = {
            "predictions": clas_preds,
            "ground_truth": labels
        }
        self.losses = {
            "loss_g": loss_g,
            "loss_g_ae": loss_g_ae,
            "loss_g_clas": loss_g_clas,
            "loss_d": loss_d_clas
        }
        self.metrics = {
            "accu_d": accu_d,
            "accu_g": accu_g,
            "accu_g_gdy": accu_g_gdy,
        }
        self.train_ops = {
            "train_op_g": train_op_g,
            "train_op_g_ae": train_op_g_ae,
            "train_op_d": train_op_d
        }
        self.samples = {
            "original": inputs['text_ids'][:, 1:],
            "transferred": outputs_.sample_id
        }

        self.fetches_train_g = {
            "loss_g": self.train_ops["train_op_g"],
            "loss_g_ae": self.losses["loss_g_ae"],
            "loss_g_clas": self.losses["loss_g_clas"],
            "accu_g": self.metrics["accu_g"],
            "accu_g_gdy": self.metrics["accu_g_gdy"],
        }
        self.fetches_train_d = {
            "loss_d": self.train_ops["train_op_d"],
            "accu_d": self.metrics["accu_d"]
        }
        fetches_eval = {"batch_size": get_batch_size(inputs['text_ids'])}
        fetches_eval.update(self.losses)
        fetches_eval.update(self.metrics)
        fetches_eval.update(self.samples)
        fetches_eval.update(self.predictions)
        self.fetches_eval = fetches_eval
Пример #7
0
    def _build_model(self, inputs, vocab, finputs, minputs, gamma):
        """Builds the model.
        """
        self.inputs = inputs
        self.finputs = finputs
        self.minputs = minputs
        self.vocab = vocab

        self.embedder = WordEmbedder(vocab_size=self.vocab.size,
                                     hparams=self._hparams.embedder)
        # maybe later have to try BidirectionalLSTMEncoder
        self.encoder = UnidirectionalRNNEncoder(
            hparams=self._hparams.encoder)  #GRU cell

        # text_ids for encoder, with BOS(begin of sentence) token removed
        self.enc_text_ids = self.inputs['text_ids'][:, 1:]
        self.enc_outputs, self.final_state = self.encoder(
            self.embedder(self.enc_text_ids),
            sequence_length=self.inputs['length'] - 1)

        h = self.final_state

        # Teacher-force decoding and the auto-encoding loss for G
        self.decoder = AttentionRNNDecoder(
            memory=self.enc_outputs,
            memory_sequence_length=self.inputs['length'] - 1,
            cell_input_fn=lambda inputs, attention: inputs,
            #default: lambda inputs, attention: tf.concat([inputs, attention], -1), which cancats regular RNN cell inputs with attentions.
            vocab_size=self.vocab.size,
            hparams=self._hparams.decoder)

        self.connector = MLPTransformConnector(self.decoder.state_size)

        self.g_outputs, _, _ = self.decoder(
            initial_state=self.connector(h),
            inputs=self.inputs['text_ids'],
            embedding=self.embedder,
            sequence_length=self.inputs['length'] - 1)

        self.loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(
            labels=self.inputs['text_ids'][:, 1:],
            logits=self.g_outputs.logits,
            sequence_length=self.inputs['length'] - 1,
            average_across_timesteps=True,
            sum_over_timesteps=False)

        # Greedy decoding, used in eval (and RL training)
        start_tokens = tf.ones_like(
            self.inputs['labels']) * self.vocab.bos_token_id
        end_token = self.vocab.eos_token_id
        self.outputs, _, length = self.decoder(
            #也许可以尝试之后把这个换成 "infer_sample"看效果
            decoding_strategy='infer_greedy',
            initial_state=self.connector(h),
            embedding=self.embedder,
            start_tokens=start_tokens,
            end_token=end_token)

        # Creates optimizers
        self.g_vars = collect_trainable_variables(
            [self.embedder, self.encoder, self.connector, self.decoder])
        self.train_op_g_ae = get_train_op(self.loss_g_ae,
                                          self.g_vars,
                                          hparams=self._hparams.opt)

        # Interface tensors
        self.samples = {
            "batch_size": get_batch_size(self.inputs['text_ids']),
            "original": self.inputs['text_ids'][:, 1:],
            "transferred": self.outputs.sample_id  #outputs 是infer_greedy的结果
        }

        ############################ female sentiment regression model
        #现在只用了convnet不知道效果,之后可以试试RNN decoding看regression的准确度,或者把两个结合一下(concat成一个向量)
        self.fconvnet = Conv1DNetwork(
            hparams=self._hparams.convnet
        )  #[batch_size, time_steps, embedding_dim] (default input)
        #convnet = Conv1DNetwork()
        self.freg_embedder = WordEmbedder(
            vocab_size=self.vocab.size, hparams=self._hparams.embedder
        )  #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
        self.fconv_output = self.fconvnet(inputs=self.freg_embedder(
            ids=self.finputs['text_ids'][:, 1:]))  #(64, 128)  等一会做一下finputs!!!
        p = {"type": "Dense", "kwargs": {'units': 1}}
        self.fdense_layer = tx.core.layers.get_layer(hparams=p)
        self.freg_output = self.fdense_layer(inputs=self.fconv_output)
        '''
        #考虑
        self.fenc_text_ids = self.finputs['text_ids'][:, 1:]
        self.fencoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) #GRU cell
        self.fenc_outputs, self.ffinal_state = self.fencoder(self.freg_embedder(self.fenc_text_ids),sequence_length=self.finputs['length']-1)
        self.freg_output = self.fdense_layer(inputs = tf.concat([self.fconv_output, self.ffinal_state], -1))
        '''

        self.fprediction = tf.reshape(self.freg_output, [-1])
        self.fground_truth = tf.to_float(self.finputs['labels'])

        self.floss_reg_single = tf.pow(
            self.fprediction - self.fground_truth,
            2)  #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
        self.floss_reg_batch = tf.reduce_mean(
            self.floss_reg_single)  #对一个batch求和平均的loss

        #self.freg_vars = collect_trainable_variables([self.freg_embedder, self.fconvnet, self.fencoder, self.fdense_layer])
        self.freg_vars = collect_trainable_variables(
            [self.freg_embedder, self.fconvnet, self.fdense_layer])
        self.ftrain_op_d = get_train_op(self.floss_reg_batch,
                                        self.freg_vars,
                                        hparams=self._hparams.opt)

        self.freg_sample = {
            "fprediction": self.fprediction,
            "fground_truth": self.fground_truth,
            "fsent": self.finputs['text_ids'][:, 1:]
        }

        ############################ male sentiment regression model
        self.mconvnet = Conv1DNetwork(
            hparams=self._hparams.convnet
        )  #[batch_size, time_steps, embedding_dim] (default input)
        #convnet = Conv1DNetwork()
        self.mreg_embedder = WordEmbedder(
            vocab_size=self.vocab.size, hparams=self._hparams.embedder
        )  #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
        self.mconv_output = self.mconvnet(inputs=self.mreg_embedder(
            ids=self.minputs['text_ids'][:, 1:]))  #(64, 128)
        p = {"type": "Dense", "kwargs": {'units': 1}}
        self.mdense_layer = tx.core.layers.get_layer(hparams=p)
        self.mreg_output = self.mdense_layer(inputs=self.mconv_output)
        '''
        #考虑
        self.menc_text_ids = self.minputs['text_ids'][:, 1:]
        self.mencoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder) #GRU cell
        self.menc_outputs, self.mfinal_state = self.mencoder(self.mreg_embedder(self.menc_text_ids),sequence_length=self.minputs['length']-1)
        self.mreg_output = self.mdense_layer(inputs = tf.concat([self.mconv_output, self.mfinal_state], -1))
        '''

        self.mprediction = tf.reshape(self.mreg_output, [-1])
        self.mground_truth = tf.to_float(self.minputs['labels'])

        self.mloss_reg_single = tf.pow(
            self.mprediction - self.mground_truth,
            2)  #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
        self.mloss_reg_batch = tf.reduce_mean(
            self.mloss_reg_single)  #对一个batch求和平均的loss

        #self.mreg_vars = collect_trainable_variables([self.mreg_embedder, self.mconvnet, self.mencoder, self.mdense_layer])
        self.mreg_vars = collect_trainable_variables(
            [self.mreg_embedder, self.mconvnet, self.mdense_layer])
        self.mtrain_op_d = get_train_op(self.mloss_reg_batch,
                                        self.mreg_vars,
                                        hparams=self._hparams.opt)

        self.mreg_sample = {
            "mprediction": self.mprediction,
            "mground_truth": self.mground_truth,
            "msent": self.minputs['text_ids'][:, 1:]
        }

        ###### get self.pre_dif when doing RL training (for transferred sents)
        ### pass to female regression model
        self.RL_fconv_output = self.fconvnet(inputs=self.freg_embedder(
            ids=self.outputs.sample_id))  #(64, 128)  等一会做一下finputs!!!
        self.RL_freg_output = self.fdense_layer(inputs=self.RL_fconv_output)
        self.RL_fprediction = tf.reshape(self.RL_freg_output, [-1])
        ### pass to male regression model
        self.RL_mconv_output = self.mconvnet(inputs=self.mreg_embedder(
            ids=self.outputs.sample_id))  #(64, 128)  等一会做一下finputs!!!
        self.RL_mreg_output = self.mdense_layer(inputs=self.RL_mconv_output)
        self.RL_mprediction = tf.reshape(self.RL_mreg_output, [-1])

        self.pre_dif = tf.abs(self.RL_fprediction - self.RL_mprediction)

        ###### get self.Ypre_dif for original sents
        ### pass to female regression model
        self.YRL_fconv_output = self.fconvnet(inputs=self.freg_embedder(
            ids=self.inputs['text_ids'][:, 1:]))  #(64, 128)  等一会做一下finputs!!!
        self.YRL_freg_output = self.fdense_layer(inputs=self.YRL_fconv_output)
        self.YRL_fprediction = tf.reshape(self.YRL_freg_output, [-1])
        ### pass to male regression model
        self.YRL_mconv_output = self.mconvnet(inputs=self.mreg_embedder(
            ids=self.inputs['text_ids'][:, 1:]))  #(64, 128)  等一会做一下finputs!!!
        self.YRL_mreg_output = self.mdense_layer(inputs=self.YRL_mconv_output)
        self.YRL_mprediction = tf.reshape(self.YRL_mreg_output, [-1])

        self.Ypre_dif = tf.abs(self.YRL_fprediction - self.YRL_mprediction)

        ######################## RL training
        '''
        def fil(elem):
            return tf.where(elem > 1.3, tf.minimum(elem,3), 0)
        def fil_pushsmall(elem):
            return tf.add(tf.where(elem <0.5, 1, 0),tf.where(elem>1.5,-0.5*elem,0))
        '''
        '''
        #缩小prediction差异
        def fil1(elem):
            return tf.where(elem<0.5,1.0,0.0)
        def fil2(elem):
            return tf.where(elem>1.5,-0.5*elem,0.0)
        '''

        #扩大prediction差异
        def fil1(elem):
            return tf.where(elem < 0.5, -0.01, 0.0)

        def fil2(elem):
            return tf.where(elem > 1.3, elem, 0.0)

        # 维数是(batch_size,time_step),对应的是一个batch中每一个sample的每一个timestep的loss
        self.beginning_loss_g_RL2 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            _sentinel=None,
            labels=self.outputs.sample_id,
            logits=self.outputs.logits,
            name=None)
        self.middle_loss_g_RL2 = tf.reduce_sum(
            self.beginning_loss_g_RL2, axis=1
        )  #(batch_size,),这样得到的loss是每一个句子的loss(对time_steps求和,对batch不求和)

        #trivial "RL" training with all weight set to 1
        #final_loss_g_RL2 = tf.reduce_sum(self.middle_loss_g_RL2)

        #RL training
        self.filtered = tf.add(tf.map_fn(fil1, self.pre_dif),
                               tf.map_fn(fil2, self.pre_dif))
        self.updated_loss_per_sent = tf.multiply(
            self.filtered,
            self.middle_loss_g_RL2)  #haven't set threshold for weight update
        self.updated_loss_per_batch = tf.reduce_sum(
            self.updated_loss_per_sent)  #############!!有一个问题:
        # 我想update每一个句子的loss,但是train_updated那里会报错,所以好像只能updateloss的求和,这样是相当于update每一个句子的loss吗?

        self.vars_updated = collect_trainable_variables(
            [self.connector, self.decoder])
        self.train_updated = get_train_op(self.updated_loss_per_batch,
                                          self.vars_updated,
                                          hparams=self._hparams.opt)
        self.train_updated_interface = {
            "pre_dif": self.pre_dif,
            "updated_loss_per_sent": self.updated_loss_per_sent,
            "updated_loss_per_batch": self.updated_loss_per_batch,
        }

        ### Train AE and RL together
        self.loss_AERL = gamma * self.updated_loss_per_batch + self.loss_g_ae
        self.vars_AERL = collect_trainable_variables(
            [self.connector, self.decoder])
        self.train_AERL = get_train_op(self.loss_AERL,
                                       self.vars_AERL,
                                       hparams=self._hparams.opt)
Пример #8
0
def generator(text_ids, text_keyword_id, text_keyword_length, labels,
              text_length, temperature, vocab_size, batch_size, seq_len,
              gen_emb_dim, mem_slots, head_size, num_heads, hidden_dim,
              start_token):

    # Source word embedding
    src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,
                                                hparams=hparams.embedder)
    src_word_embeds = src_word_embedder(text_keyword_id)

    encoder = UnidirectionalRNNEncoder(hparams=hparams.encoder)
    enc_outputs, final_state = encoder(inputs=src_word_embeds,
                                       sequence_length=text_keyword_length)

    # modify sentiment label
    label_connector = MLPTransformConnector(output_size=hparams.dim_c)
    state_connector = MLPTransformConnector(output_size=700)

    labels = tf.to_float(tf.reshape(labels, [batch_size, 1]))
    c = label_connector(labels)
    c_ = label_connector(1 - labels)
    h = tf.concat([c, final_state], axis=1)
    h_ = tf.concat([c_, final_state], axis=1)

    state = state_connector(h)
    state_ = state_connector(h_)

    decoder = AttentionRNNDecoder(
        memory=enc_outputs,
        memory_sequence_length=text_keyword_length,
        cell_input_fn=lambda inputs, attention: inputs,
        vocab_size=vocab_size,
        hparams=hparams.decoder)

    # For training
    g_outputs, _, _ = decoder(initial_state=state,
                              inputs=text_ids,
                              embedding=src_word_embedder,
                              sequence_length=tf.convert_to_tensor(
                                  np.array([(seq_len - 1)
                                            for i in range(batch_size)],
                                           dtype=np.int32)))
    # e = g_outputs.cell_output

    start_tokens = np.ones(batch_size, int)
    end_token = int(2)
    # Greedy decoding, used in eval
    outputs_, _, length_ = decoder(decoding_strategy='infer_greedy',
                                   initial_state=state_,
                                   embedding=src_word_embedder,
                                   start_tokens=start_tokens,
                                   end_token=end_token)

    pretrain_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
        labels=text_ids[:, 1:],
        logits=g_outputs.logits,
        sequence_length=text_length - 1,
        average_across_timesteps=True,
        sum_over_timesteps=False)

    # # Gumbel-softmax decoding, used in training
    gumbel_helper = GumbelSoftmaxEmbeddingHelper(src_word_embedder.embedding,
                                                 start_tokens, end_token,
                                                 temperature)

    gumbel_outputs, _, sequence_lengths = decoder(helper=gumbel_helper,
                                                  initial_state=state_)

    # max_index = tf.argmax(gumbel_outputs.logits, axis=2)

    gen_o = tf.reduce_sum(tf.reduce_max(outputs_.logits, axis=2), 1)

    return gumbel_outputs.logits, outputs_.sample_id, pretrain_loss, gen_o
Пример #9
0
    def _build_model(self, inputs, vocab, gamma, lambda_g, lambda_z, lambda_z1,
                     lambda_z2, lambda_ae):

        embedder = WordEmbedder(vocab_size=vocab.size,
                                hparams=self._hparams.embedder)

        encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder)

        enc_text_ids = inputs['text_ids'][:, 1:]
        enc_outputs, final_state = encoder(embedder(enc_text_ids),
                                           sequence_length=inputs['length'] -
                                           1)

        z = final_state[:, self._hparams.dim_c:]

        # -------------------- CLASSIFIER ---------------------

        n_classes = self._hparams.num_classes
        z_classifier_l1 = MLPTransformConnector(
            256, hparams=self._hparams.z_classifier_l1)
        z_classifier_l2 = MLPTransformConnector(
            64, hparams=self._hparams.z_classifier_l2)
        z_classifier_out = MLPTransformConnector(
            n_classes if n_classes > 2 else 1)

        z_logits = z_classifier_l1(z)
        z_logits = z_classifier_l2(z_logits)
        z_logits = z_classifier_out(z_logits)
        z_pred = tf.greater(z_logits, 0)
        z_logits = tf.reshape(z_logits, [-1])

        z_pred = tf.to_int64(tf.reshape(z_pred, [-1]))

        loss_z_clas = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.to_float(inputs['labels']), logits=z_logits)
        loss_z_clas = tf.reduce_mean(loss_z_clas)

        accu_z_clas = tx.evals.accuracy(labels=inputs['labels'], preds=z_pred)

        # -------------------________________---------------------

        label_connector = MLPTransformConnector(self._hparams.dim_c)

        labels = tf.to_float(tf.reshape(inputs['labels'], [-1, 1]))

        c = label_connector(labels)
        c_ = label_connector(1 - labels)

        h = tf.concat([c, z], 1)
        h_ = tf.concat([c_, z], 1)

        # Teacher-force decoding and the auto-encoding loss for G

        decoder = AttentionRNNDecoder(
            memory=enc_outputs,
            memory_sequence_length=inputs['length'] - 1,
            cell_input_fn=lambda inputs, attention: inputs,
            vocab_size=vocab.size,
            hparams=self._hparams.decoder)

        connector = MLPTransformConnector(decoder.state_size)

        g_outputs, _, _ = decoder(initial_state=connector(h),
                                  inputs=inputs['text_ids'],
                                  embedding=embedder,
                                  sequence_length=inputs['length'] - 1)

        loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(
            labels=inputs['text_ids'][:, 1:],
            logits=g_outputs.logits,
            sequence_length=inputs['length'] - 1,
            average_across_timesteps=True,
            sum_over_timesteps=False)

        # Gumbel-softmax decoding, used in training

        start_tokens = tf.ones_like(inputs['labels']) * vocab.bos_token_id

        end_token = vocab.eos_token_id

        gumbel_helper = GumbelSoftmaxEmbeddingHelper(embedder.embedding,
                                                     start_tokens, end_token,
                                                     gamma)

        soft_outputs_, _, soft_length_, = decoder(helper=gumbel_helper,
                                                  initial_state=connector(h_))

        soft_outputs, _, soft_length, = decoder(helper=gumbel_helper,
                                                initial_state=connector(h))

        # ---------------------------- SHIFTED LOSS -------------------------------------
        _, encoder_final_state_ = encoder(
            embedder(soft_ids=soft_outputs_.sample_id),
            sequence_length=inputs['length'] - 1)
        _, encoder_final_state = encoder(
            embedder(soft_ids=soft_outputs.sample_id),
            sequence_length=inputs['length'] - 1)
        new_z_ = encoder_final_state_[:, self._hparams.dim_c:]
        new_z = encoder_final_state[:, self._hparams.dim_c:]

        cos_distance_z_ = tf.abs(
            tf.losses.cosine_distance(tf.nn.l2_normalize(z, axis=1),
                                      tf.nn.l2_normalize(new_z_, axis=1),
                                      axis=1))
        cos_distance_z = tf.abs(
            tf.losses.cosine_distance(tf.nn.l2_normalize(z, axis=1),
                                      tf.nn.l2_normalize(new_z, axis=1),
                                      axis=1))
        # ----------------------------______________-------------------------------------

        # Greedy decoding, used in eval

        outputs_, _, length_ = decoder(decoding_strategy='infer_greedy',
                                       initial_state=connector(h_),
                                       embedding=embedder,
                                       start_tokens=start_tokens,
                                       end_token=end_token)

        # Creates classifier

        classifier = Conv1DClassifier(hparams=self._hparams.classifier)

        clas_embedder = WordEmbedder(vocab_size=vocab.size,
                                     hparams=self._hparams.embedder)

        # Classification loss for the classifier

        clas_logits, clas_preds = classifier(
            inputs=clas_embedder(ids=inputs['text_ids'][:, 1:]),
            sequence_length=inputs['length'] - 1)

        loss_d_clas = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.to_float(inputs['labels']), logits=clas_logits)

        loss_d_clas = tf.reduce_mean(loss_d_clas)

        accu_d = tx.evals.accuracy(labels=inputs['labels'], preds=clas_preds)

        # Classification loss for the generator, based on soft samples

        soft_logits, soft_preds = classifier(
            inputs=clas_embedder(soft_ids=soft_outputs_.sample_id),
            sequence_length=soft_length_)

        loss_g_clas = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.to_float(1 - inputs['labels']), logits=soft_logits)

        loss_g_clas = tf.reduce_mean(loss_g_clas)

        # Accuracy on soft samples, for training progress monitoring

        accu_g = tx.evals.accuracy(labels=1 - inputs['labels'],
                                   preds=soft_preds)

        # Accuracy on greedy-decoded samples, for training progress monitoring

        _, gdy_preds = classifier(inputs=clas_embedder(ids=outputs_.sample_id),
                                  sequence_length=length_)

        accu_g_gdy = tx.evals.accuracy(labels=1 - inputs['labels'],
                                       preds=gdy_preds)

        # Aggregates losses

        loss_g = lambda_ae * loss_g_ae + \
                 lambda_g * loss_g_clas + \
                 lambda_z1 * cos_distance_z + cos_distance_z_ * lambda_z2 \
                 - lambda_z * loss_z_clas
        loss_d = loss_d_clas
        loss_z = loss_z_clas

        # Creates optimizers

        g_vars = collect_trainable_variables(
            [embedder, encoder, label_connector, connector, decoder])
        d_vars = collect_trainable_variables([clas_embedder, classifier])
        z_vars = collect_trainable_variables(
            [z_classifier_l1, z_classifier_l2, z_classifier_out])

        train_op_g = get_train_op(loss_g, g_vars, hparams=self._hparams.opt)
        train_op_g_ae = get_train_op(loss_g_ae,
                                     g_vars,
                                     hparams=self._hparams.opt)
        train_op_d = get_train_op(loss_d, d_vars, hparams=self._hparams.opt)
        train_op_z = get_train_op(loss_z, z_vars, hparams=self._hparams.opt)

        # Interface tensors
        self.losses = {
            "loss_g": loss_g,
            "loss_g_ae": loss_g_ae,
            "loss_g_clas": loss_g_clas,
            "loss_d": loss_d_clas,
            "loss_z_clas": loss_z_clas,
            "loss_cos_": cos_distance_z_,
            "loss_cos": cos_distance_z
        }
        self.metrics = {
            "accu_d": accu_d,
            "accu_g": accu_g,
            "accu_g_gdy": accu_g_gdy,
            "accu_z_clas": accu_z_clas
        }
        self.train_ops = {
            "train_op_g": train_op_g,
            "train_op_g_ae": train_op_g_ae,
            "train_op_d": train_op_d,
            "train_op_z": train_op_z
        }
        self.samples = {
            "original": inputs['text_ids'][:, 1:],
            "transferred": outputs_.sample_id,
            "z_vector": z,
            "labels_source": inputs['labels'],
            "labels_target": 1 - inputs['labels'],
            "labels_predicted": gdy_preds
        }

        self.fetches_train_g = {
            "loss_g": self.train_ops["train_op_g"],
            "loss_g_ae": self.losses["loss_g_ae"],
            "loss_g_clas": self.losses["loss_g_clas"],
            "loss_shifted_ae1": self.losses["loss_cos"],
            "loss_shifted_ae2": self.losses["loss_cos_"],
            "accu_g": self.metrics["accu_g"],
            "accu_g_gdy": self.metrics["accu_g_gdy"],
            "accu_z_clas": self.metrics["accu_z_clas"]
        }

        self.fetches_train_z = {
            "loss_z": self.train_ops["train_op_z"],
            "accu_z": self.metrics["accu_z_clas"]
        }

        self.fetches_train_d = {
            "loss_d": self.train_ops["train_op_d"],
            "accu_d": self.metrics["accu_d"]
        }
        fetches_eval = {"batch_size": get_batch_size(inputs['text_ids'])}
        fetches_eval.update(self.losses)
        fetches_eval.update(self.metrics)
        fetches_eval.update(self.samples)
        self.fetches_eval = fetches_eval
Пример #10
0
def generator(text_ids, text_keyword_id, text_keyword_length, labels,
              text_length, temperature, vocab_size, batch_size, seq_len,
              gen_emb_dim, mem_slots, head_size, num_heads, hidden_dim,
              start_token):

    is_target = tf.to_float(tf.not_equal(text_ids[:, 1:], 0))

    # Source word embedding
    src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,
                                                hparams=trans_config.emb)
    src_word_embeds = src_word_embedder(text_keyword_id)
    src_word_embeds = src_word_embeds * trans_config.hidden_dim**0.5

    # Position embedding (shared b/w source and target)
    pos_embedder = tx.modules.SinusoidsPositionEmbedder(
        position_size=seq_len, hparams=trans_config.position_embedder_hparams)
    # src_seq_len = batch_data['text_keyword_length']
    src_pos_embeds = pos_embedder(sequence_length=seq_len)

    src_input_embedding = src_word_embeds + src_pos_embeds

    encoder = TransformerEncoder(hparams=trans_config.encoder)
    encoder_output = encoder(inputs=src_input_embedding,
                             sequence_length=text_keyword_length)

    # modify sentiment label
    label_connector = MLPTransformConnector(
        output_size=trans_config.hidden_dim)

    labels = tf.to_float(tf.reshape(labels, [-1, 1]))
    c = tf.reshape(label_connector(labels), [batch_size, 1, 512])
    c_ = tf.reshape(label_connector(1 - labels), [batch_size, 1, 512])
    encoder_output = tf.concat([c, encoder_output[:, 1:, :]], axis=1)
    encoder_output_ = tf.concat([c_, encoder_output[:, 1:, :]], axis=1)

    # The decoder ties the input word embedding with the output logit layer.
    # As the decoder masks out <PAD>'s embedding, which in effect means
    # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding
    # to all-zero.
    tgt_embedding = tf.concat([
        tf.zeros(shape=[1, src_word_embedder.dim]),
        src_word_embedder.embedding[1:, :]
    ],
                              axis=0)
    tgt_embedder = tx.modules.WordEmbedder(tgt_embedding)
    tgt_word_embeds = tgt_embedder(text_ids)
    tgt_word_embeds = tgt_word_embeds * trans_config.hidden_dim**0.5

    tgt_seq_len = text_length
    tgt_pos_embeds = pos_embedder(sequence_length=tgt_seq_len)

    tgt_input_embedding = tgt_word_embeds + tgt_pos_embeds

    _output_w = tf.transpose(tgt_embedder.embedding, (1, 0))

    decoder = TransformerDecoder(vocab_size=vocab_size,
                                 output_layer=_output_w,
                                 hparams=trans_config.decoder)
    # For training
    outputs = decoder(memory=encoder_output,
                      memory_sequence_length=text_keyword_length,
                      inputs=tgt_input_embedding,
                      decoding_strategy='train_greedy',
                      mode=tf.estimator.ModeKeys.TRAIN)

    mle_loss = transformer_utils.smoothing_cross_entropy(
        outputs.logits[:, :-1, :], text_ids[:, 1:], vocab_size,
        trans_config.loss_label_confidence)
    pretrain_loss = tf.reduce_sum(
        mle_loss * is_target) / tf.reduce_sum(is_target)

    # Gumbel-softmax decoding, used in training
    start_tokens = np.ones(batch_size, int)
    end_token = int(2)
    gumbel_helper = GumbelSoftmaxEmbeddingHelper(tgt_embedding, start_tokens,
                                                 end_token, temperature)

    gumbel_outputs, sequence_lengths = decoder(
        memory=encoder_output_,
        memory_sequence_length=text_keyword_length,
        helper=gumbel_helper)

    # max_index = tf.argmax(gumbel_outputs.logits, axis=2)
    # gen_x_onehot_adv = tf.one_hot(max_index, vocab_size, sentiment.1.0, 0.0)

    gen_o = tf.reduce_sum(tf.reduce_max(gumbel_outputs.logits, axis=2))

    return gumbel_outputs.logits, gumbel_outputs.sample_id, pretrain_loss, gen_o