def __init__(self, config): gen_hyperparameters = config['hyperparameters']['gen'] self.encoder_a = layers.Encoder(gen_hyperparameters) self.encoder_b = layers.Encoder(gen_hyperparameters) self.encoder_shared = layers.EncoderShared(gen_hyperparameters) self.downstreamer = layers.Downstreamer(gen_hyperparameters) self.decoder_shared = layers.DecoderShared(gen_hyperparameters) self.decoder_a = layers.Decoder(gen_hyperparameters) self.decoder_b = layers.Decoder(gen_hyperparameters) dis_hyperparameters = config['hyperparameters']['dis'] self.dis_a = layers.Discriminator(dis_hyperparameters) self.dis_b = layers.Discriminator(dis_hyperparameters)
def __init__(self, word_vectors, char_vectors, context_max_len, query_max_len, d_model, train_cemb=False, pad=0, dropout=0.1, num_head=8): """ """ super(QANet, self).__init__() if train_cemb: self.char_emb = nn.Embedding.from_pretrained(char_vectors, freeze=False) print("Training char_embeddings") else: self.char_emb = nn.Embedding.from_pretrained(char_vectors) self.word_emb = nn.Embedding.from_pretrained(word_vectors) self.LC = context_max_len self.LQ = query_max_len self.num_head = num_head self.pad = pad self.dropout = dropout wemb_dim = word_vectors.size()[1] cemb_dim = char_vectors.size()[1] #print("Word vector dim-%d, Char vector dim-%d" % (wemb_dim, cemb_dim)) #Layer Declarations self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model) self.emb_enc = layers.Encoder(num_conv=4, d_model=d_model, num_head=num_head, k=7, dropout=0.1) self.cq_att = layers.CQAttention(d_model=d_model) self.cq_resizer = layers.Initialized_Conv1d( d_model * 4, d_model ) #Foward layer to reduce dimension of cq_att output back to d_dim self.model_enc_blks = nn.ModuleList([ layers.Encoder(num_conv=2, d_model=d_model, num_head=num_head, k=5, dropout=0.1) for _ in range(7) ]) self.out = layers.QAOutput(d_model)
def __init__(self, in_vocab, hidden_size, n_layers, trg_vocab, drop_prob=0., use_answer=True): super(Seq2Seq, self).__init__() self.enc = layers.Encoder( input_size=in_vocab.vectors.size(1) if not use_answer else in_vocab.vectors.size(1) + config.answer_embedding_size, hidden_size=hidden_size, num_layers=n_layers, word_vectors=in_vocab.vectors, bidirectional=True, drop_prob=drop_prob if n_layers > 1 else 0.) self.dec = layers.Decoder(input_size=in_vocab.vectors.size(1) + hidden_size, hidden_size=hidden_size, word_vectors=in_vocab.vectors, trg_vocab=trg_vocab, n_layers=n_layers, dropout=drop_prob if n_layers > 1 else 0., attention=True)
def __init__(self, d_w, d_e, num_classes, hidden_dim, word_emb_weight, num_layers=4, num_heads=8, dropout=0.1, max_sen_len=100): super(Transformer, self).__init__() self.max_sen_len = max_sen_len self.w2v = nn.Embedding.from_pretrained(word_emb_weight, freeze=False) self.pos_embedding1 = nn.Embedding(2 * self.max_sen_len, d_e) self.pos_embedding2 = nn.Embedding(2 * self.max_sen_len, d_e) c = copy.deepcopy d_model = d_w + 2 * d_e self_attn = attention.MultiHeadAttention(h=num_heads, d_model=d_model, dropout=dropout) ff = layers.PositionwiseFeedForward(d_model=d_model, d_ff=hidden_dim, dropout=dropout) word_attn = attention.WordAttention( d_model) # (batch, sen, d_model) => (batch, d_model) self.model = nn.Sequential( layers.Encoder( layers.EncoderLayer(d_model, c(self_attn), c(ff), dropout), num_layers), word_attn, nn.Linear(d_model, d_model // 2), nn.ReLU(), nn.Linear(d_model // 2, num_classes)) for p in self.model.parameters(): if p.dim() > 1: # dim: 维度数 nn.init.xavier_uniform_(p)
def test_encoder_with_resblock(capsys): define_additional_flags(Namespace(disable_residual_block=False)) input = tf.placeholder(tf.float32, [None, 64, 64, 32]) output = layers.Encoder(32, FLAGS.hidden_dims, False)(input) with capsys.disabled(): print('encoder with residual blocks output:') print(output.shape) print('')
def test_encoder(capsys): define_additional_flags() input = tf.placeholder(tf.float32, [None, 64, 64, 32]) output = layers.Encoder(32, FLAGS.hidden_dims, False)(input) with capsys.disabled(): print('encoder output:') print(output.shape) print('')
def __init__(self, hidden_dim, embedding_matrix, train_word_embeddings, dropout, pooling_size, number_of_iters, number_of_layers): super(CoattentionModel, self).__init__() self.Encoder = layers.Encoder(hidden_dim, embedding_matrix, train_word_embeddings, dropout, number_of_layers) self.Coattention_Encoder = layers.Coattention_Encoder( hidden_dim, dropout, number_of_layers) self.Decoder = layers.Decoder(hidden_dim, pooling_size, number_of_iters, dropout)
def __init__(self, d_w, d_e, num_heads, num_layers, hidden_dim, window_sizes, num_filter, dropout_p, is_gpu, num_classes=2): super(CharAttnModelHelper, self).__init__() self.w2v = nn.Embedding(97, d_w) self.pos_embedding = nn.Embedding(842, d_e) self.is_gpu = is_gpu c = copy.deepcopy d_model = d_w + d_e self.cnn_layer1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=d_model, kernel_size=(3, d_model), stride=(1, 1), padding=(1, 0)) # (batch, d_model, max_sen_len, 1) ) self.cnn_layer1.apply(self.weights_init) self_attn = attention.MultiHeadAttention(h=num_heads, d_model=d_model, dropout=dropout_p) ff = layers.PositionwiseFeedForward(d_model=d_model, d_ff=hidden_dim, dropout=dropout_p) self.self_attn_layer = nn.Sequential( layers.Encoder( layers.EncoderLayer(d_model, c(self_attn), c(ff), dropout_p), num_layers)) # (batch, max_sen_len, d_w + d_e) for p in self.self_attn_layer.parameters(): if p.dim() > 1: # dim: 维度数 nn.init.xavier_uniform_(p) self.cnn_layer2 = CNNLayers(d_model, num_filter, window_sizes, dropout_p, is_gpu) # (batch, len(window_sizes), num_filter) => (batch, num_filter) self.word_attn = attention.WordAttention(num_filter) for p in self.word_attn.parameters(): if p.dim() > 1: # dim: 维度数 nn.init.xavier_uniform_(p) self.linear_layer = nn.Sequential( nn.Linear(num_filter, num_filter // 2), nn.Dropout(dropout_p), nn.Tanh(), nn.Linear(num_filter // 2, num_classes)) self.linear_layer.apply(self.weights_init)
def __init__(self, source_vocab_size, target_vocab_size, embedding_dim, cell_type, rnn_dim, encoder_rnn_layer_num, bidirectional, decoder_rnn_layer_num, attention_dim, go_id, eos_id, pad_id): self.GO = go_id self.EOS = eos_id self.PAD = pad_id self.TARGET_VOCAB_SIZE = target_vocab_size self.rnn_dim = rnn_dim self.source_embedding_layer = layers.Embedding_Layer( source_vocab_size, embedding_dim, "source") self.target_embedding_layer = layers.Embedding_Layer( target_vocab_size, embedding_dim, "target") self.encoder = layers.Encoder(cell_type, rnn_dim, encoder_rnn_layer_num, bidirectional) self.decoder = layers.Decoder(cell_type, rnn_dim, decoder_rnn_layer_num) self.attention_layer = layers.Attention_Layer(attention_dim) self.projection_layer = tf.layers.Dense(embedding_dim, use_bias=False)
def build_reused_layers(self): if not FLAGS.reuse: template = no_reuse else: template = reuse_layer self.encoder = template( 'encoder', lambda: layers.Encoder(FLAGS.channel_dims, FLAGS. hidden_dims, self.training)) self.decoder = template( 'decoder', lambda: layers.Decoder(FLAGS.channel_dims, self.training), ) self.downsampler = template( 'downsampler', lambda: layers.Downsampler(FLAGS.gaussian_kernel_width), ) self.likelihoods = template( 'likelihoods', lambda: layers.LatentDistribution(), )
def __init__(self, word_vectors, char_vectors, context_max_len, query_max_len, d_model, d_head, mem_len=0, same_length=False, clamp_len=-1, train_cemb=False, pad=0, dropout=0.1, num_head=8): """ """ super(QANet, self).__init__() if train_cemb: self.char_emb = nn.Embedding.from_pretrained(char_vectors, freeze=False) else: self.char_emb = nn.Embedding.from_pretrained(char_vectors) self.word_emb = nn.Embedding.from_pretrained(word_vectors) self.LC = context_max_len self.LQ = query_max_len self.num_head = num_head self.pad = pad self.dropout = dropout self.mem_len = mem_len self.d_head = d_head self.d_model = d_model self.num_head = num_head self.same_length = same_length self.clamp_len = clamp_len self.ext_len = 0 wemb_dim = word_vectors.size()[1] cemb_dim = char_vectors.size()[1] #Layer Declarations self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model) self.emb_enc = layers.Encoder(4, num_head, d_model, d_head, d_inner=d_model * 4, k=7, dropout=0.1) #Hard coded self.cq_att = layers.CQAttention(d_model=d_model) self.cq_resizer = layers.Initialized_Conv1d( d_model * 4, d_model ) #Foward layer to reduce dimension of cq_att output back to d_dim self.model_enc_blks = nn.ModuleList([ layers.Encoder(2, num_head, d_model, d_head, d_inner=d_model * 4, k=5, dropout=0.1) for _ in range(7) ]) self.out = layers.QAOutput(d_model) self.drop = nn.Dropout(dropout) self._create_parameters()