示例#1
0
    def __init__(self,
                 layer_e,
                 hidden1,
                 Zdim,
                 layer_l,
                 hidden3,
                 layer_d,
                 hidden4,
                 logchange=True,
                 Type='ZINB',
                 n_centroids=4,
                 penality="GMM"):

        super(scMVAE_Concat, self).__init__()
        ### function definition
        self.encoder_x = Encoder(layer_e, hidden1, Zdim)
        self.encoder_l = Encoder(layer_l, hidden3, 1)

        if Type == 'ZINB':
            self.decoder_x = Decoder_ZINB(layer_d, hidden4, layer_e[0])

        else:
            self.decoder_x = Decoder(layer_d, hidden4, layer_e[0], Type)

        ### parameters definition
        self.logchange = logchange
        self.Type = Type
        self.penality = penality
        self.n_centroids = n_centroids

        self.pi = nn.Parameter(torch.ones(n_centroids) / n_centroids)  # pc
        self.mu_c = nn.Parameter(torch.zeros(Zdim, n_centroids))  # mu
        self.var_c = nn.Parameter(torch.ones(Zdim, n_centroids))  # sigma^2
示例#2
0
def load_encoder(cfg, enc_type):
    model = Encoder()
    if cfg.disent.load:
        fn = Path("checkpoint_{}.tar".format(cfg.disent.epoch_num))
        model_fp = Path(cfg.disent.model_path) / Path(f"enc_{enc_type}") / fn
        model.load_state_dict(
            torch.load(model_fp, map_location=cfg.disent.device.type))
    model = model.to(cfg.disent.device)
    return model
示例#3
0
 def __init__(self, params):
     super(Seq2Seq, self).__init__()
     self.params = params
     self.embedding_matrix = load_embedding_matrix()
     self.encoder = Encoder(params["vocab_size"], params["vector_dim"],
                            params["encoder_units"], self.embedding_matrix)
     self.attention = Attention(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["vector_dim"],
                            params["decoder_units"], self.embedding_matrix)
示例#4
0
 def __init__(self, params):
     super(PGN, self).__init__()
     self.params = params
     self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                            params["enc_units"], params["batch_size"])
     self.attention = LuongAttention(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                            params["dec_units"], params["batch_size"])
     self.pointer = Pointer()
 def __init__(self, params, embeddings_matrix):
     super(PGN, self).__init__()
     self.params = params
     self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                            params["enc_units"], params["batch_size"],
                            embeddings_matrix)
     self.attention = BahdanauAttention(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                            params["dec_units"], params["batch_size"],
                            embeddings_matrix)
     self.pointer = Pointer()
示例#6
0
 def __init__(self, params):
     super(PGN, self).__init__()
     self.params = params
     self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                            params["enc_units"], params["batch_size"])
     self.attention = BahdanauAttention(params["attn_units"])
     if params["coverage"]:
         self.coverage = Coverage(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                            params["dec_units"], params["batch_size"],
                            params["use_stats"])
     self.pointer = Pointer()
示例#7
0
    def __init__(
            self,
            src_lang,
            trg_lang,
            max_len=32,
            hid_dim=256,
            enc_layers=3,
            dec_layers=3,
            enc_heads=8,
            dec_heads=8,
            enc_pf_dim=512,
            dec_pf_dim=512,
            enc_dropout=0.1,
            dec_dropout=0.1,
            lr=0.0005,
            **kwargs,  # throwaway
    ):
        super().__init__()

        self.save_hyperparameters()
        del self.hparams["src_lang"]
        del self.hparams["trg_lang"]

        self.src_lang = src_lang
        self.trg_lang = trg_lang

        self.encoder = Encoder(
            src_lang.n_words,
            hid_dim,
            enc_layers,
            enc_heads,
            enc_pf_dim,
            enc_dropout,
            device,
        )

        self.decoder = Decoder(
            trg_lang.n_words,
            hid_dim,
            dec_layers,
            dec_heads,
            dec_pf_dim,
            dec_dropout,
            device,
        )

        self.criterion = nn.CrossEntropyLoss(
            ignore_index=self.trg_lang.PAD_idx)
        self.initialize_weights()
        self.to(device)
示例#8
0
    def __init__(self,
                 features,
                 adj_lists,
                 ft_size,
                 n_h,
                 activation,
                 num_sample=[10, 10],
                 skip_connection=False,
                 gcn=True):
        super(DGI_ind, self).__init__()
        self.features = features
        self.skip_connection = skip_connection
        self.agg1 = MeanAggregator(features,
                                   cuda=torch.cuda.is_available(),
                                   gcn=gcn,
                                   name='l1')
        self.enc1 = Encoder(features,
                            ft_size,
                            n_h,
                            adj_lists,
                            self.agg1,
                            num_sample=num_sample[0],
                            gcn=gcn,
                            cuda=torch.cuda.is_available(),
                            activation=activation,
                            skip_connection=skip_connection,
                            name='l2')
        self.agg2 = MeanAggregator(lambda nodes: self.enc1(nodes),
                                   cuda=torch.cuda.is_available(),
                                   gcn=gcn,
                                   name='l3')
        self.enc2 = Encoder(lambda nodes: self.enc1(nodes),
                            self.enc1.embed_dim,
                            n_h,
                            adj_lists,
                            self.agg2,
                            num_sample=num_sample[1],
                            base_model=self.enc1,
                            gcn=gcn,
                            cuda=torch.cuda.is_available(),
                            activation=activation,
                            skip_connection=skip_connection,
                            name='l4')
        self.read = AvgReadout()
        self.sigm = nn.Sigmoid()

        if skip_connection:
            self.disc = Discriminator(2 * n_h)
        else:
            self.disc = Discriminator(n_h)
    def init_model(self):
        num_enc_layers = self.config['num_enc_layers']
        num_enc_heads = self.config['num_enc_heads']
        num_dec_layers = self.config['num_dec_layers']
        num_dec_heads = self.config['num_dec_heads']

        embed_dim = self.config['embed_dim']
        ff_dim = self.config['ff_dim']
        dropout = self.config['dropout']

        # get encoder, decoder
        self.encoder = Encoder(num_enc_layers,
                               num_enc_heads,
                               embed_dim,
                               ff_dim,
                               dropout=dropout)
        self.decoder = Decoder(num_dec_layers,
                               num_dec_heads,
                               embed_dim,
                               ff_dim,
                               dropout=dropout)

        # leave layer norm alone
        init_func = nn.init.xavier_normal_ if self.config[
            'init_type'] == ac.XAVIER_NORMAL else nn.init.xavier_uniform_
        for m in [
                self.encoder.self_atts, self.encoder.pos_ffs,
                self.decoder.self_atts, self.decoder.pos_ffs,
                self.decoder.enc_dec_atts
        ]:
            for p in m.parameters():
                if p.dim() > 1:
                    init_func(p)
                else:
                    nn.init.constant_(p, 0.)
示例#10
0
class Seq2Seq(tf.keras.Model):
    def __init__(self, params):
        super(Seq2Seq, self).__init__()
        self.params = params
        self.embedding_matrix = load_embedding_matrix()
        self.encoder = Encoder(params["vocab_size"], params["vector_dim"],
                               params["encoder_units"], self.embedding_matrix)
        self.attention = Attention(params["attn_units"])
        self.decoder = Decoder(params["vocab_size"], params["vector_dim"],
                               params["decoder_units"], self.embedding_matrix)

    def call_encoder(self, enc_inp):
        enc_hidden = self.encoder.init_hidden(self.params['batch_size'])
        enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden)
        return enc_output, enc_hidden

    def call_decoder(self, dec_input, dec_hidden, enc_output, target):
        predictions = []

        for t in range(1, self.params['max_y_length'] + 2):
            context_vector, _ = self.attention(dec_hidden, enc_output)
            predict, dec_hidden = self.decoder(dec_input, dec_hidden,
                                               enc_output, context_vector)
            dec_input = tf.expand_dims(target[:, t], 1)  # 使用teach forcing

            predictions.append(predict)

        return tf.stack(predictions, 1), dec_hidden

    def call_one_step_decoder(self, dec_input, dec_hidden, enc_output):
        context_vector, attention_weights = self.attention(
            dec_hidden, enc_output)
        prediction, dec_hidden = self.decoder(dec_input, None, None,
                                              context_vector)
        return prediction, dec_hidden, context_vector, attention_weights
示例#11
0
    def __init__(self,
                 embedding_dim=256,
                 vocab_size=388 + 2,
                 num_layer=6,
                 max_seq=2048,
                 dropout=0.2,
                 debug=False,
                 loader_path=None,
                 dist=False,
                 writer=None):
        super().__init__()
        self.infer = False
        if loader_path is not None:
            self.load_config_file(loader_path)
        else:
            self._debug = debug
            self.max_seq = max_seq
            self.num_layer = num_layer
            self.embedding_dim = embedding_dim
            self.vocab_size = vocab_size
            self.dist = dist

        self.writer = writer
        self.Decoder = Encoder(num_layers=self.num_layer,
                               d_model=self.embedding_dim,
                               input_vocab_size=self.vocab_size,
                               rate=dropout,
                               max_len=max_seq)
        self.fc = torch.nn.Linear(self.embedding_dim, self.vocab_size)
示例#12
0
 def __init__(self, params):
     super(PGN, self).__init__()
     word_model_path = os.path.join(os.path.abspath('../'), 'data',
                                    'w2v.model')
     vocab_path = os.path.join(os.path.abspath('../'), 'data',
                               'words_frequences.txt')
     self.params = params
     self.matrix = get_embedding(vocab_path, word_model_path, params)
     self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                            self.matrix, params["enc_units"],
                            params["batch_size"])
     self.attention = BahdanauAttention(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                            self.matrix, params["dec_units"],
                            params["batch_size"])
     self.pointer = Pointer()
示例#13
0
    def __init__(self, hp):
        super(UTransformer, self).__init__()
        self.hp = hp
        self.encoder = Encoder(num_layers=hp.num_blocks,
                               num_predictor=hp.num_predictor,
                               att_unit=(hp.vunits, hp.MTunits, hp.Tunits,
                                         hp.Munits),
                               value_attr=(hp.V_kernel, hp.V_stride),
                               in_seqlen=hp.in_seqlen,
                               num_heads=hp.num_heads,
                               model_structure=hp.model_structure,
                               d_ff=hp.d_ff,
                               d_model=hp.d_model,
                               drop_rate=hp.dropout_rate)

        self.decoder = Decoder(num_layers=hp.num_blocks,
                               num_predictor=hp.num_predictor,
                               att_unit=(hp.vunits, hp.MTunits, hp.Tunits,
                                         hp.Munits),
                               value_attr=(hp.V_kernel, hp.V_stride),
                               out_seqlen=hp.out_seqlen,
                               num_heads=hp.num_heads,
                               model_structure=hp.model_structure,
                               d_ff=hp.d_ff,
                               d_model=hp.d_model,
                               drop_rate=hp.dropout_rate)
示例#14
0
 def __init__(self,):
     super(PointerEncoder, self).__init__()
     self.encoder = Encoder(config.num_layers, config.n_head, config.d_model,
                            config.vocab_size, config.max_enc_len,
                            d_q, d_k, d_v, config.d_affine,
                            config.embedding_dropout, config.att_dropout, config.fc_dorpout)
     self.W_h = nn.Linear(config.d_model, config.d_model, bias=False)
示例#15
0
 def __init__(self, num_layers, d_model, num_heads, dff,
              input_vocab_size, target_vocab_size, 
              pe_input, pe_target, rate=0.1):
     super(Transformer, self).__init__()
     
     self.Encoder = Encoder(num_layers, d_model, num_heads, dff, 
                            input_vocab_size, pe_input, rate)
     
     self.Decoder = Decoder(num_layers, d_model, num_heads, dff, 
                            target_vocab_size, pe_target, rate)
     
     self.dense = tf.keras.layers.Dense(target_vocab_size)
    def __init__(self, args):
        super(Transformer, self).__init__()
        self.args = args

        embed_dim = args.embed_dim
        fix_norm = args.fix_norm
        joint_vocab_size = args.joint_vocab_size
        lang_vocab_size = args.lang_vocab_size
        use_bias = args.use_bias
        self.scale = embed_dim ** 0.5

        if args.mask_logit:
            # mask logits separately per language
            self.logit_mask = None
        else:
            # otherwise, use the same mask for all
            # this only masks out BOS and PAD
            mask = [1.] * joint_vocab_size
            mask[ac.BOS_ID] = 0.
            mask[ac.PAD_ID] = 0.
            self.logit_mask = torch.tensor(mask).type(torch.uint8)

        self.word_embedding = Parameter(torch.Tensor(joint_vocab_size, embed_dim))
        self.lang_embedding = Parameter(torch.Tensor(lang_vocab_size, embed_dim))
        self.out_bias = Parameter(torch.Tensor(joint_vocab_size)) if use_bias else None

        self.encoder1 = Encoder(args)
        self.encoder2 = Encoder(args)
        self.decoder = PEDecoder(args)

        # initialize
        nn.init.normal_(self.lang_embedding, mean=0, std=embed_dim ** -0.5)
        if fix_norm:
            d = 0.01
            nn.init.uniform_(self.word_embedding, a=-d, b=d)
        else:
            nn.init.normal_(self.word_embedding, mean=0, std=embed_dim ** -0.5)

        if use_bias:
            nn.init.constant_(self.out_bias, 0.)
示例#17
0
class PGN(tf.keras.Model):
    def __init__(self, params):
        super(PGN, self).__init__()
        word_model_path = os.path.join(os.path.abspath('../'), 'data',
                                       'w2v.model')
        vocab_path = os.path.join(os.path.abspath('../'), 'data',
                                  'words_frequences.txt')
        self.params = params
        self.matrix = get_embedding(vocab_path, word_model_path, params)
        self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                               self.matrix, params["enc_units"],
                               params["batch_size"])
        self.attention = BahdanauAttention(params["attn_units"])
        self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                               self.matrix, params["dec_units"],
                               params["batch_size"])
        self.pointer = Pointer()

    def call_encoder(self, enc_inp):
        enc_hidden = self.encoder.initialize_hidden_state()
        enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden)
        return enc_hidden, enc_output

    def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp,
             batch_oov_len):
        predictions = []
        attentions = []
        p_gens = []
        context_vector, _ = self.attention(dec_hidden, enc_output)
        for t in range(dec_inp.shape[1]):
            dec_x, pred, dec_hidden = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output,
                context_vector)
            context_vector, attn = self.attention(dec_hidden, enc_output)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))

            predictions.append(pred)
            attentions.append(attn)
            p_gens.append(p_gen)
        final_dists = _calc_final_dist(enc_extended_inp, predictions,
                                       attentions, p_gens, batch_oov_len,
                                       self.params["vocab_size"],
                                       self.params["batch_size"])
        if self.params["mode"] == "train":
            return tf.stack(final_dists, 1), dec_hidden
            # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
        else:
            return tf.stack(final_dists,
                            1), dec_hidden, context_vector, tf.stack(
                                attentions, 1), tf.stack(p_gens, 1)
示例#18
0
class PGN(tf.keras.Model):
    def __init__(self, params):
        super(PGN, self).__init__()
        self.params = params
        self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                               params["enc_units"], params["batch_size"])
        self.attention = LuongAttention(params["attn_units"])
        self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                               params["dec_units"], params["batch_size"])
        self.pointer = Pointer()

    def call_encoder(self, enc_inp):
        enc_hidden = self.encoder.initialize_hidden_state()
        #enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden)
        enc_output, enc_hidden, enc_state = self.encoder(enc_inp, enc_hidden)
        return enc_hidden, enc_state, enc_output  ## Changes done

    def call(self, enc_output, dec_hidden, enc_state, enc_inp,
             enc_extended_inp, dec_inp, batch_oov_len):

        predictions = []
        attentions = []
        p_gens = []
        #print('we wil call attention now')
        context_vector, _ = self.attention(dec_hidden, enc_output)
        for t in range(dec_inp.shape[1]):
            #print('Ok here we are 1')
            dec_x, pred, dec_hidden, context_vector, attn = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), [dec_hidden, enc_state],
                enc_output, context_vector)  #Changes
            context_vector1, attn1 = self.attention(dec_hidden, enc_output)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))

            predictions.append(pred)
            attentions.append(attn)
            p_gens.append(p_gen)
        final_dists = _calc_final_dist(enc_extended_inp, predictions,
                                       attentions, p_gens, batch_oov_len,
                                       self.params["vocab_size"],
                                       self.params["batch_size"])
        if self.params["mode"] == "train":
            return tf.stack(
                final_dists, 1
            ), dec_hidden  # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
        else:
            return tf.stack(final_dists,
                            1), dec_hidden, context_vector, tf.stack(
                                attentions, 1), tf.stack(p_gens, 1)
示例#19
0
 def __init__(self,
              input_vocab_size,
              output_vocab_size,
              d_model,
              n_layers,
              n_heads,
              d_ff,
              dropout_rate=0.1):
     super().__init__()
     self.encoder = Encoder(input_vocab_size, d_model, n_layers, n_heads,
                            d_ff, dropout_rate)
     self.decoder = Decoder(output_vocab_size, d_model, n_layers, n_heads,
                            d_ff, dropout_rate)
     self.final_output_dense = tf.keras.layers.Dense(
         output_vocab_size
     )  # map decoder output from d_model to output_vocab_size
示例#20
0
    def __init__(self, hps, device):
        super(LM, self).__init__()
        self.hps = hps
        self.device = device
        self.emb_size = hps.emb_size
        self.hidden_size = hps.hidden_size
        self.vocab_size = hps.vocab_size

        self.pad_idx = hps.pad_idx

        # componets
        self.layers = nn.ModuleDict()
        self.layers['word_embed'] = nn.Embedding(self.vocab_size,
                                                 self.emb_size,
                                                 padding_idx=self.pad_idx)
        self.layers['encoder'] = Encoder(self.emb_size,
                                         self.hidden_size,
                                         drop_ratio=hps.drop_ratio)
        self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)
示例#21
0
 def __init__(self,
              num_layers,
              d_model,
              num_heads,
              dff,
              input_vocab_size,
              output_dim,
              maximum_position_encoding,
              rate=0.1):
     super(Transformer, self).__init__()
     #encoder层
     self.encoder = Encoder(num_layers, d_model, num_heads, dff,
                            input_vocab_size, maximum_position_encoding,
                            rate)
     #降维
     self.x_flatten = tf.keras.layers.Flatten()
     #全连接层
     self.final_layer = tf.keras.layers.Dense(output_dim,
                                              activation='sigmoid')
class PGN(tf.keras.Model):
    def __init__(self, params):
        super(PGN, self).__init__()
        self.params = params
        self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                               params["enc_units"], params["batch_size"])
        self.attention = BahdanauAttention(params["attn_units"])
        self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                               params["dec_units"], params["batch_size"])
        self.pointer = Pointer()

    def call_encoder(self, enc_inp):
        enc_hidden = self.encoder.initialize_hidden_state()
        enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden)
        return enc_hidden, enc_output

    def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp,
             batch_oov_len):

        predictions = []
        attentions = []
        p_gens = []
        context_vector, _ = self.attention(dec_hidden, enc_output)
        for t in range(dec_inp.shape[1]):
            dec_x, pred, dec_hidden = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output,
                context_vector)
            context_vector, attn = self.attention(dec_hidden, enc_output)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))

            predictions.append(pred)
            attentions.append(attn)
            p_gens.append(p_gen)
        final_dists = _calc_final_dist(enc_extended_inp, predictions,
                                       attentions, p_gens, batch_oov_len,
                                       self.params["vocab_size"],
                                       self.params["batch_size"])
        return tf.stack(
            final_dists, 1
        ), dec_hidden  # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
示例#23
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 pe_input,
                 pe_target,
                 rate=0.1,
                 training=True):
        super(Transformer, self).__init__()

        self.encoder = Encoder(num_layers, d_model, num_heads, dff, pe_input,
                               rate)

        self.decoder = Decoder(num_layers, d_model, num_heads, dff,
                               target_vocab_size, pe_target, rate)

        self.final_layer = tf.keras.layers.Dense(target_vocab_size)

        self.training = training
示例#24
0
    def __init__(self,
                 vocab_size,
                 embed_size,
                 output_size,
                 filters=128,
                 num_heads=1,
                 ques_limit=50,
                 dropout=0.1,
                 num_blocks=1,
                 num_convs=2,
                 embeddings=None,
                 initializer=tf.variance_scaling_initializer(
                     1, 'fan_in', distribution='normal'),
                 regularizer=l2(3e-7)):
        self.ques_limit = ques_limit
        self.num_blocks = num_blocks
        self.num_convs = num_convs
        self.dropout = dropout
        if embeddings is not None:
            embeddings = [embeddings]
        self.embed_layer = Embedding(vocab_size,
                                     embed_size,
                                     weights=embeddings,
                                     trainable=False)
        self.highway = Highway(embed_size, 2, initializer, regularizer,
                               dropout)
        self.projection = Conv1D(filters,
                                 1,
                                 activation='linear',
                                 kernel_initializer=initializer,
                                 kernel_regularizer=regularizer,
                                 bias_regularizer=regularizer)
        self.encoder = Encoder(filters, 7, num_blocks, num_convs, num_heads,
                               initializer, regularizer, dropout)

        self.output_layer = Conv1D(output_size,
                                   1,
                                   activation='linear',
                                   kernel_regularizer=regularizer)
示例#25
0
    def __init__(self, hps, device):
        super(Seq2Seq, self).__init__()
        self.hps = hps
        self.device = device

        self.emb_size = hps.emb_size
        self.hidden_size = hps.hidden_size
        self.flow_h_size = hps.flow_h_size
        self.flow_depth = hps.flow_depth
        self.vocab_size = hps.vocab_size
        self.max_len = hps.max_len

        self._infor_nats = hps.infor_nats
        self._infor_groups = hps.infor_groups

        self.pad_idx = hps.pad_idx
        self.bos_idx = hps.bos_idx

        self.bos_tensor = torch.tensor(self.bos_idx,
                                       dtype=torch.long,
                                       device=device).view(1, 1)

        # we directly set the latent size to be same as that of sentence representation v_k
        self.latent_size = self.hidden_size * 2

        # componets
        self.layers = nn.ModuleDict()
        self.layers['word_embed'] = nn.Embedding(self.vocab_size,
                                                 self.emb_size,
                                                 padding_idx=self.pad_idx)

        self.layers['source_encoder'] = Encoder(self.emb_size,
                                                self.hidden_size,
                                                drop_ratio=hps.drop_ratio)
        self.layers['style_encoder'] = Encoder(self.emb_size,
                                               self.hidden_size,
                                               drop_ratio=hps.drop_ratio)

        # decoder inpus: word embedding, encoder states, and style representation z
        self.layers['decoder'] = Decoder(self.emb_size + self.hidden_size * 2 +
                                         self.latent_size,
                                         self.hidden_size,
                                         drop_ratio=hps.drop_ratio,
                                         attn_drop_ratio=hps.attn_drop_ratio)

        self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)

        # MLP for calculate dec init state
        self.layers['dec_init_h'] = nn.Sequential(
            nn.Linear(self.hidden_size * 2, self.hidden_size), nn.Tanh())
        self.layers['dec_init_c'] = nn.Sequential(
            nn.Linear(self.hidden_size * 2, self.hidden_size), nn.Tanh())

        self.layers['flow_h_proj'] = nn.Sequential(
            nn.Linear(self.hidden_size * 2, self.flow_h_size), nn.Tanh())

        # the Inverse Autoregressive Flow (IAF) module
        self.layers['iaf'] = IAF(n_z=self.latent_size,
                                 n_h=self.flow_h_size,
                                 n_made=hps.made_size,
                                 flow_depth=self.flow_depth)

        # --------------
        self._log2pi = torch.log(
            torch.tensor(2 * np.pi, dtype=torch.float,
                         device=device))  # log(2pi)
示例#26
0
def get_full_model(vocab_size=len(hparams.VOCAB),
                   char_embed_size=hparams.CHAR_EMBED_SIZE,
                   sliding_window_size=hparams.SLIDING_WINDOW_SIZE,
                   spk_embed_lstm_units=hparams.SPK_EMBED_LSTM_UNITS,
                   spk_embed_size=hparams.SPK_EMBED_SIZE,
                   spk_embed_num_layers=hparams.SPK_EMBED_NUM_LAYERS,
                   enc_conv1_bank_depth=hparams.ENC_CONV1_BANK_DEPTH,
                   enc_convprojec_filters1=hparams.ENC_CONVPROJEC_FILTERS1,
                   enc_convprojec_filters2=hparams.ENC_CONVPROJEC_FILTERS2,
                   enc_highway_depth=hparams.ENC_HIGHWAY_DEPTH,
                   hidden_size=hparams.HIDDEN_SIZE,
                   post_conv1_bank_depth=hparams.POST_CONV1_BANK_DEPTH,
                   post_convprojec_filters1=hparams.POST_CONVPROJEC_FILTERS1,
                   post_convprojec_filters2=hparams.POST_CONVPROJEC_FILTERS2,
                   post_highway_depth=hparams.POST_HIGHWAY_DEPTH,
                   attention_dim=hparams.ATTENTION_DIM,
                   target_size=hparams.TARGET_MAG_FRAME_SIZE,
                   n_mels=hparams.SYNTHESIZER_N_MELS,
                   output_per_step=hparams.OUTPUT_PER_STEP,
                   embed_mels=hparams.SPK_EMBED_N_MELS,
                   enc_seq_len=None,
                   dec_seq_len=None):
    char_inputs = Input(shape=(enc_seq_len, ), name='char_inputs')
    decoder_inputs = Input(shape=(dec_seq_len, n_mels), name='decoder_inputs')
    spk_inputs = Input(shape=(None, sliding_window_size, embed_mels),
                       name='spk_embed_inputs')

    char_encoder = Encoder(hidden_size=hidden_size // 2,
                           vocab_size=vocab_size,
                           embedding_size=char_embed_size,
                           conv1d_bank_depth=enc_conv1_bank_depth,
                           convprojec_filters1=enc_convprojec_filters1,
                           convprojec_filters2=enc_convprojec_filters2,
                           highway_depth=enc_highway_depth,
                           name='char_encoder')
    speaker_encoder = InferenceSpeakerEmbedding(
        lstm_units=spk_embed_lstm_units,
        proj_size=spk_embed_size,
        num_layers=spk_embed_num_layers,
        trainable=False,
        name='embeddings')
    condition = Conditioning()
    decoder = Decoder(hidden_size=hidden_size,
                      attention_dim=attention_dim,
                      n_mels=n_mels,
                      output_per_step=output_per_step,
                      name='decoder')
    post_processing = PostProcessing(
        hidden_size=hidden_size // 2,
        conv1d_bank_depth=post_conv1_bank_depth,
        convprojec_filters1=post_convprojec_filters1,
        convprojec_filters2=post_convprojec_filters2,
        highway_depth=post_highway_depth,
        n_fft=target_size,
        name='postprocessing')

    char_enc = char_encoder(char_inputs)
    spk_embed = speaker_encoder(spk_inputs)
    conditioned_char_enc = condition([char_enc, spk_embed])
    decoder_pred, alignments = decoder([conditioned_char_enc, decoder_inputs],
                                       initial_state=None)
    postnet_out = post_processing(decoder_pred)

    full_model = Model(
        inputs=[char_inputs, spk_inputs, decoder_inputs],
        outputs=[decoder_pred, postnet_out, alignments, spk_embed])
    return full_model
示例#27
0
    def __init__(self,
                 vocab_size,
                 embed_size,
                 filters=128,
                 num_heads=8,
                 encoder_num_blocks=1,
                 encoder_num_convs=4,
                 output_num_blocks=7,
                 output_num_convs=2,
                 cont_limit=400,
                 ques_limit=50,
                 dropout=0.1,
                 embeddings=None,
                 initializer=tf.variance_scaling_initializer(
                     1, 'fan_in', distribution='normal'),
                 regularizer=l2(3e-7)):
        self.cont_limit = cont_limit
        self.ques_limit = ques_limit
        self.dropout = dropout
        self.encoder_num_blocks = encoder_num_blocks
        self.encoder_num_convs = encoder_num_convs
        if embeddings is not None:
            embeddings = [embeddings]
        self.embed_layer = Embedding(vocab_size,
                                     embed_size,
                                     weights=embeddings,
                                     trainable=False)
        self.highway = Highway(embed_size, 2, initializer, regularizer,
                               dropout)
        self.projection1 = Conv1D(filters,
                                  1,
                                  activation='linear',
                                  kernel_initializer=initializer,
                                  kernel_regularizer=regularizer,
                                  bias_regularizer=regularizer)

        self.encoder = Encoder(filters, 7, encoder_num_blocks,
                               encoder_num_convs, num_heads, initializer,
                               regularizer, dropout)

        self.coattention = ContextQueryAttention(cont_limit, ques_limit,
                                                 initializer, regularizer,
                                                 dropout)
        self.projection2 = Conv1D(filters,
                                  1,
                                  activation='linear',
                                  kernel_initializer=initializer,
                                  kernel_regularizer=regularizer,
                                  bias_regularizer=regularizer)

        self.output_layer = Encoder(filters, 5, output_num_blocks,
                                    output_num_convs, num_heads, initializer,
                                    regularizer, dropout)

        self.start_layer = Conv1D(1,
                                  1,
                                  activation='linear',
                                  kernel_initializer=initializer,
                                  kernel_regularizer=regularizer,
                                  bias_regularizer=regularizer)
        self.end_layer = Conv1D(1,
                                1,
                                activation='linear',
                                kernel_initializer=initializer,
                                kernel_regularizer=regularizer,
                                bias_regularizer=regularizer)
示例#28
0
class PGN(tf.keras.Model):
    def __init__(self, params):
        super(PGN, self).__init__()
        self.params = params
        self.encoder = Encoder(params["vocab_size"], params["embed_size"],
                               params["enc_units"], params["batch_size"])
        self.attention = BahdanauAttention(params["attn_units"])
        if params["coverage"]:
            self.coverage = Coverage(params["attn_units"])
        self.decoder = Decoder(params["vocab_size"], params["embed_size"],
                               params["dec_units"], params["batch_size"],
                               params["use_stats"])
        self.pointer = Pointer()

    def call_encoder(self, enc_inp):
        enc_hidden = self.encoder.initialize_hidden_state()
        enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden)
        return enc_hidden, enc_output

    def call(self,
             enc_output,
             dec_hidden,
             enc_inp,
             enc_extended_inp,
             dec_inp,
             batch_oov_len,
             cov_vec,
             stats=None):

        predictions = []
        attentions = []
        p_gens = []
        if self.params["coverage"]:
            cov_features = self.coverage(cov_vec)
        else:
            cov_features = None
        context_vector, _ = self.attention(dec_hidden, enc_output,
                                           cov_features)

        for t in range(dec_inp.shape[1]):
            dec_x, pred, dec_hidden = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output,
                context_vector, stats)

            if self.params["coverage"]:
                cov_features = self.coverage(cov_vec)
            else:
                cov_features = None
            context_vector, attn = self.attention(dec_hidden, enc_output,
                                                  cov_features)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))
            if self.params["coverage"]:
                cov_vec += attn

            attn = tf.squeeze(attn, axis=-1)
            predictions.append(pred)
            attentions.append(attn)
            p_gens.append(p_gen)

        final_dists = _calc_final_dist(enc_extended_inp, predictions,
                                       attentions, p_gens, batch_oov_len,
                                       self.params["vocab_size"],
                                       self.params["batch_size"])
        res = {}
        res["final_dists"] = tf.stack(final_dists, 1)
        res["dec_hidden"] = dec_hidden
        if self.params["coverage"] or self.params["mode"] != "train":
            res["cov_vec"] = cov_vec
            res["attn_weights"] = tf.stack(attentions, 1)
        if self.params["mode"] != "train":
            res["context"] = context_vector
            res["p_gens"] = tf.stack(p_gens, 1)
        return res  # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
示例#29
0
def get_synthesizer_model(
        vocab_size=len(hparams.VOCAB),
        char_embed_size=hparams.CHAR_EMBED_SIZE,
        spk_embed_size=hparams.SPK_EMBED_SIZE,
        enc_conv1_bank_depth=hparams.ENC_CONV1_BANK_DEPTH,
        enc_convprojec_filters1=hparams.ENC_CONVPROJEC_FILTERS1,
        enc_convprojec_filters2=hparams.ENC_CONVPROJEC_FILTERS2,
        enc_highway_depth=hparams.ENC_HIGHWAY_DEPTH,
        hidden_size=hparams.HIDDEN_SIZE,
        post_conv1_bank_depth=hparams.POST_CONV1_BANK_DEPTH,
        post_convprojec_filters1=hparams.POST_CONVPROJEC_FILTERS1,
        post_convprojec_filters2=hparams.POST_CONVPROJEC_FILTERS2,
        post_highway_depth=hparams.POST_HIGHWAY_DEPTH,
        attention_dim=hparams.ATTENTION_DIM,
        target_size=hparams.TARGET_MAG_FRAME_SIZE,
        n_mels=hparams.SYNTHESIZER_N_MELS,
        output_per_step=hparams.OUTPUT_PER_STEP,
        learning_rate=hparams.LEARNING_RATE,
        clipnorm=hparams.CLIPNORM,
        enc_seq_len=None,
        dec_seq_len=None):
    char_inputs = Input(shape=(enc_seq_len, ), name='char_inputs')
    decoder_inputs = Input(shape=(dec_seq_len, n_mels), name='decoder_inputs')
    spk_embed_inputs = Input(shape=(spk_embed_size, ), name='spk_embed_inputs')

    char_encoder = Encoder(hidden_size=hidden_size // 2,
                           vocab_size=vocab_size,
                           embedding_size=char_embed_size,
                           conv1d_bank_depth=enc_conv1_bank_depth,
                           convprojec_filters1=enc_convprojec_filters1,
                           convprojec_filters2=enc_convprojec_filters2,
                           highway_depth=enc_highway_depth,
                           name='char_encoder')
    condition = Conditioning()
    decoder = Decoder(hidden_size=hidden_size,
                      attention_dim=attention_dim,
                      n_mels=n_mels,
                      output_per_step=output_per_step,
                      name='decoder')
    post_processing = PostProcessing(
        hidden_size=hidden_size // 2,
        conv1d_bank_depth=post_conv1_bank_depth,
        convprojec_filters1=post_convprojec_filters1,
        convprojec_filters2=post_convprojec_filters2,
        highway_depth=post_highway_depth,
        n_fft=target_size,
        name='postprocessing')

    char_enc = char_encoder(char_inputs)
    conditioned_char_enc = condition([char_enc, spk_embed_inputs])
    decoder_pred, alignments = decoder([conditioned_char_enc, decoder_inputs],
                                       initial_state=None)
    postnet_out = post_processing(decoder_pred)

    synthesizer_model = Model(
        inputs=[char_inputs, spk_embed_inputs, decoder_inputs],
        outputs=[decoder_pred, postnet_out, alignments])
    optimizer = Adam(lr=learning_rate, clipnorm=clipnorm)
    synthesizer_model.compile(optimizer=optimizer,
                              loss=['mae', 'mae', None],
                              loss_weights=[1., 1., None])

    return synthesizer_model
示例#30
0
    def __init__(self,
                 encoder_1,
                 hidden_1,
                 Z_DIMS,
                 decoder_share,
                 share_hidden,
                 decoder_1,
                 hidden_2,
                 encoder_l,
                 hidden3,
                 encoder_2,
                 hidden_4,
                 encoder_l1,
                 hidden3_1,
                 decoder_2,
                 hidden_5,
                 drop_rate,
                 log_variational=True,
                 Type='Bernoulli',
                 device='cpu',
                 n_centroids=19,
                 penality="GMM",
                 model=2):

        super(scMVAE_POE, self).__init__()

        self.X1_encoder = Encoder(encoder_1,
                                  hidden_1,
                                  Z_DIMS,
                                  dropout_rate=drop_rate)
        self.X1_encoder_l = Encoder(encoder_l,
                                    hidden3,
                                    1,
                                    dropout_rate=drop_rate)

        self.X1_decoder = Decoder_ZINB(decoder_1,
                                       hidden_2,
                                       encoder_1[0],
                                       dropout_rate=drop_rate)

        self.X2_encoder = Encoder(encoder_2,
                                  hidden_4,
                                  Z_DIMS,
                                  dropout_rate=drop_rate)

        self.decode_share = build_multi_layers(decoder_share,
                                               dropout_rate=drop_rate)

        if Type == 'ZINB':
            self.X2_encoder_l = Encoder(encoder_l1,
                                        hidden3_1,
                                        1,
                                        dropout_rate=drop_rate)
            self.decoder_x2 = Decoder_ZINB(decoder_2,
                                           hidden_5,
                                           encoder_2[0],
                                           dropout_rate=drop_rate)
        elif Type == 'Bernoulli':
            self.decoder_x2 = Decoder(decoder_2,
                                      hidden_5,
                                      encoder_2[0],
                                      Type,
                                      dropout_rate=drop_rate)
        elif Type == "Possion":
            self.decoder_x2 = Decoder(decoder_2,
                                      hidden_5,
                                      encoder_2[0],
                                      Type,
                                      dropout_rate=drop_rate)
        else:
            self.decoder_x2 = Decoder(decoder_2,
                                      hidden_5,
                                      encoder_2[0],
                                      Type,
                                      dropout_rate=drop_rate)

        self.experts = ProductOfExperts()
        self.Z_DIMS = Z_DIMS
        self.share_hidden = share_hidden
        self.log_variational = log_variational
        self.Type = Type
        self.decoder_share = decoder_share
        self.decoder_1 = decoder_1
        self.n_centroids = n_centroids
        self.penality = penality
        self.device = device
        self.model = model

        self.pi = nn.Parameter(torch.ones(n_centroids) / n_centroids)  # pc
        self.mu_c = nn.Parameter(torch.zeros(Z_DIMS, n_centroids))  # mu
        self.var_c = nn.Parameter(torch.ones(Z_DIMS, n_centroids))  # sigma^2