示例#1
0
    def __init__(self, config):
        super(Transducer, self).__init__()
        # define encoder
        self.config = config
        self.encoder = build_encoder(config)
        # define decoder
        self.decoder = build_decoder(config)
        # define JointNet
        self.joint = JointNet(
            input_size=config.joint.input_size,
            inner_dim=config.joint.inner_size,
            vocab_size=config.vocab_size
            )

        if config.share_embedding:
            assert self.decoder.embedding.weight.size() == self.joint.project_layer.weight.size(), '%d != %d' % (self.decoder.embedding.weight.size(1),  self.joint.project_layer.weight.size(1))
            self.joint.project_layer.weight = self.decoder.embedding.weight

        self.crit = RNNTLoss(blank=28)
示例#2
0
    def build_model(self):
        print('building model... ...')
        with tf.variable_scope('seq2seq_placeholder'):
            self.encoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="encoder_inputs")
            self.decoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="decoder_inputs")
            self.decoder_targets = tf.placeholder(tf.int32, [None, None],
                                                  name="decoder_targets")
            self.decoder_targets_masks = tf.placeholder(tf.float32,
                                                        [None, None],
                                                        name="mask")
            self.encoder_length = tf.placeholder(tf.int32, [None],
                                                 name="encoder_length")
            self.decoder_length = tf.placeholder(tf.int32, [None],
                                                 name="decoder_length")
            self.max_target_sequence_length = tf.reduce_max(
                self.decoder_length, name='max_target_len')

        with tf.variable_scope('seq2seq_embedding'):
            self.embedding = self.init_embedding(self.vocab_size,
                                                 self.embedding_size)

        with tf.variable_scope('seq2seq_encoder'):
            encoder_outputs, encoder_states = build_encoder(
                self.embedding,
                self.encoder_inputs,
                self.encoder_length,
                self.enc_num_layers,
                self.enc_num_units,
                self.enc_cell_type,
                bidir=self.enc_bidir)

        with tf.variable_scope('seq2seq_decoder'):
            encoder_length = self.encoder_length
            if self.beam_search:
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs,
                                             multiplier=self.beam_size)
                encoder_states = tile_batch(encoder_states,
                                            multiplier=self.beam_size)
                encoder_length = tile_batch(encoder_length,
                                            multiplier=self.beam_size)

            attention_mechanism = BahdanauAttention(
                num_units=self.attn_num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_length)

            decoder_cell = create_rnn_cell(self.dec_num_layers,
                                           self.dec_num_units,
                                           self.dec_cell_type)
            decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                attention_layer_size=self.dec_num_units,
                name='Attention_Wrapper')

            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_states)

            output_layer = tf.layers.Dense(self.vocab_size,
                                           use_bias=False,
                                           name='output_projection')

            if self.mode == 'train':
                decoder_inputs_embedded = tf.nn.embedding_lookup(
                    self.embedding, self.decoder_inputs)
                # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出
                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_length,
                    name='training_helper')

                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=training_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)

                self.decoder_logits_train = decoder_outputs.rnn_output

                self.loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_targets,
                    weights=self.decoder_targets_masks)

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(clip_gradients, trainable_params))

            elif self.mode == 'infer':
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * SOS_ID  # 这里的batch_size不需要复制
                end_token = EOS_ID

                if self.beam_search:
                    inference_decoder = BeamSearchDecoder(
                        cell=decoder_cell,
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=self.beam_size,
                        output_layer=output_layer)
                else:
                    decoding_helper = GreedyEmbeddingHelper(
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token)

                    inference_decoder = BasicDecoder(
                        cell=decoder_cell,
                        helper=decoding_helper,
                        initial_state=decoder_initial_state,
                        output_layer=output_layer)

                decoder_outputs, _, _ = dynamic_decode(
                    decoder=inference_decoder,
                    maximum_iterations=self.infer_max_iter)
                if self.beam_search:
                    infer_outputs = decoder_outputs.predicted_ids  # [batch_size, decoder_targets_length, beam_size]
                    self.infer_outputs = tf.transpose(
                        infer_outputs,
                        [0, 2, 1
                         ])  # [batch_size, beam_size, decoder_targets_length]
                else:
                    self.infer_outputs = decoder_outputs.sample_id  # [batch_size, decoder_targets_length]

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=self.max_to_keep)
示例#3
0
    def build_model(self):
        print('building model... ...')
        with tf.variable_scope('seq2seq_placeholder'):
            self.encoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="encoder_inputs")
            self.decoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="decoder_inputs")
            self.decoder_targets = tf.placeholder(tf.int32, [None, None],
                                                  name="decoder_targets")
            self.decoder_targets_masks = tf.placeholder(tf.bool, [None, None],
                                                        name="mask")
            self.encoder_length = tf.placeholder(tf.int32, [None],
                                                 name="encoder_length")
            self.decoder_length = tf.placeholder(tf.int32, [None],
                                                 name="decoder_length")
            # ECM placeholder
            self.choice_qs = tf.placeholder(tf.float32, [None, None],
                                            name="choice")
            self.emo_cat = tf.placeholder(tf.int32, [None],
                                          name="emotion_category")
            self.max_target_sequence_length = tf.reduce_max(
                self.decoder_length, name='max_target_len')

        with tf.variable_scope('seq2seq_embedding'):
            self.embedding = self.init_embedding(self.vocab_size,
                                                 self.embedding_size)
            # create emotion category embeddings
            emo_initializer = tf.contrib.layers.xavier_initializer()
            emo_cat_embeddings = tf.get_variable(
                "emo_cat_embeddings",
                [self.num_emotion, self.emo_cat_emb_size],
                initializer=emo_initializer,
                dtype=tf.float32)
            self.emo_internal_memory_embedding = tf.get_variable(
                "emo_internal_memory_embedding",
                [self.num_emotion, self.emo_internal_memory_units],
                initializer=emo_initializer,
                dtype=tf.float32)
            self.emo_cat_embs = tf.nn.embedding_lookup(emo_cat_embeddings,
                                                       self.emo_cat)

        with tf.variable_scope('seq2seq_encoder'):
            encoder_outputs, encoder_states = build_encoder(
                self.embedding,
                self.encoder_inputs,
                self.encoder_length,
                self.enc_num_layers,
                self.enc_num_units,
                self.enc_cell_type,
                bidir=self.enc_bidir)

        with tf.variable_scope('seq2seq_decoder'):
            encoder_length = self.encoder_length
            emo_cat = self.emo_cat
            emo_cat_embs = self.emo_cat_embs
            if self.beam_search:
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs,
                                             multiplier=self.beam_size)
                encoder_states = tile_batch(encoder_states,
                                            multiplier=self.beam_size)
                encoder_length = tile_batch(encoder_length,
                                            multiplier=self.beam_size)
                emo_cat = tile_batch(emo_cat, multiplier=self.beam_size)
                emo_cat_embs = tile_batch(emo_cat_embs,
                                          multiplier=self.beam_size)

            attention_mechanism = BahdanauAttention(
                num_units=self.attn_num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_length)

            decoder_cell = create_rnn_cell(self.dec_num_layers,
                                           self.dec_num_units,
                                           self.dec_cell_type)

            self.read_g = tf.layers.Dense(self.emo_internal_memory_units,
                                          use_bias=False,
                                          name="internal_read_gate")

            self.write_g = tf.layers.Dense(self.emo_internal_memory_units,
                                           use_bias=False,
                                           name="internal_write_gate")

            decoder_cell = ECMWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                emo_cat_embs=emo_cat_embs,  # emotion category embedding
                emo_cat=emo_cat,  # emotion category
                emo_internal_memory_units=self.
                emo_internal_memory_units,  # emotion memory size
                emo_internal_memory_embedding=self.
                emo_internal_memory_embedding,  # num of emotions
                read_gate=self.read_g,
                write_gate=self.write_g,
                attention_layer_size=self.dec_num_units,
                name='ECMWrapper')

            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_states)

            output_layer = tf.layers.Dense(
                self.vocab_size, use_bias=False,
                name='output_projection')  # 普通词典projection

            # ECM external memory module
            emo_output_layer = tf.layers.Dense(
                self.vocab_size, use_bias=False,
                name="emo_output_projection")  # 情感词典projection

            emo_choice_layer = tf.layers.Dense(
                1, use_bias=False,
                name="emo_choice_alpha")  # 选择情感词概率projection

            if self.mode == 'train':
                decoder_inputs_embedded = tf.nn.embedding_lookup(
                    self.embedding, self.decoder_inputs)
                # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出
                training_helper = TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_length,
                    name='training_helper')

                training_decoder = BasicDecoder(
                    cell=decoder_cell,
                    helper=training_helper,
                    initial_state=decoder_initial_state)

                self.decoder_outputs, self.final_state, self.final_sequence_length = dynamic_decode(
                    decoder=training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)

                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs.rnn_output)

                with tf.variable_scope('decoder'):
                    self.generic_logits = output_layer(
                        self.decoder_logits_train)  # 得到普通词的概率分布logits
                    self.emo_ext_logits = emo_output_layer(
                        self.decoder_logits_train)  # 得到情感词的概率分布logits
                    self.alphas = tf.nn.sigmoid(
                        emo_choice_layer(
                            self.decoder_logits_train))  # 得到选择情感词的概率
                    self.int_M_emo = self.final_state.internal_memory  # internal_memory的最终状态

                g_probs = tf.nn.softmax(
                    self.generic_logits) * (1 - self.alphas)
                e_probs = tf.nn.softmax(self.emo_ext_logits) * self.alphas
                train_log_probs = tf.log(g_probs + e_probs)

                # compute losses
                self.alphas = tf.squeeze(self.alphas, axis=-1)

                self.g_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.generic_logits,
                    labels=self.decoder_targets) - tf.log(1 - self.alphas)

                self.e_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.emo_ext_logits,
                    labels=self.decoder_targets) - tf.log(self.alphas)

                losses = self.g_losses * (
                    1 - self.choice_qs) + self.e_losses * self.choice_qs

                # alpha and internal memory regularizations
                self.alpha_reg = tf.reduce_mean(self.choice_qs *
                                                -tf.log(self.alphas))
                self.int_mem_reg = tf.reduce_mean(
                    tf.norm(self.int_M_emo + 1e-7, axis=1))
                losses = tf.boolean_mask(losses, self.decoder_targets_masks)
                self.loss = tf.reduce_mean(
                    losses) + self.alpha_reg + self.int_mem_reg

                # prepare for perlexity computations
                CE = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=train_log_probs, labels=self.decoder_targets)
                CE = tf.boolean_mask(CE, self.decoder_targets_masks)
                self.CE = tf.reduce_mean(CE)

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(clip_gradients, trainable_params))

            elif self.mode == 'infer':
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * SOS_ID
                end_token = EOS_ID

                inference_decoder = ECMBeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=self.embedding,
                    start_tokens=start_tokens,
                    end_token=end_token,
                    initial_state=decoder_initial_state,
                    beam_width=self.beam_size,
                    output_layer=output_layer,
                    emo_output_layer=emo_output_layer,
                    emo_choice_layer=emo_choice_layer)

                decoder_outputs, _, _ = dynamic_decode(
                    decoder=inference_decoder,
                    maximum_iterations=self.infer_max_iter)

                infer_outputs = decoder_outputs.predicted_ids  # [batch_size, decoder_targets_length, beam_size]
                self.infer_outputs = tf.transpose(
                    infer_outputs, [0, 2, 1], name='infer_outputs'
                )  # [batch_size, beam_size, decoder_targets_length]

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=self.max_to_keep)
from encoder import build_encoder
from decoder import build_decoder

(X_train, _), (X_test, _) = mnist.load_data()

image_size = X_train.shape[1]
X_train = np.reshape(X_train, [-1, image_size, image_size, 1])
X_test = np.reshape(X_test, [-1, image_size, image_size, 1])
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

latent_dim = 16
batch_size = 128
kernel_size = 3
layer_filters = [32, 64]

inputs, encoder, shape = build_encoder(image_size, latent_dim, layer_filters,
                                       kernel_size)
decoder = build_decoder(shape, latent_dim, layer_filters, kernel_size)

autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
autoencoder.summary()
autoencoder.compile(loss='mse', optimizer='adam')

autoencoder.fit(X_train,
                X_train,
                validation_data=(X_test, X_test),
                epochs=30,
                batch_size=batch_size)