def __init__(self, config_): super(Sequence, self).__init__() self.config = config_ self.is_train = self.config.is_train # input_size = self.config.input_size # hidden_size = self.config.hidden_size # rnn_hidden_size = self.config.rnn_hidden_size self.beam_width = self.config.beam_width self.max_decode_step = self.config.max_decode_step # modules self.en_embedder = Embedder( num_embeddings=self.config.en_vocab_size + 4, embedding_dim=self.config.embedding_dim).to(self.config.gpu) self.cn_embedder = Embedder( num_embeddings=self.config.cn_vocab_size + 4, embedding_dim=self.config.embedding_dim).to(self.config.gpu) self.encoder = RNNEncoder(config_) self.decoder = RNNDecoder(config_) self.softmax = torch.nn.Softmax(dim=-1) # loss two input sized of bs, s_len, n_class and bs, s_len self.nll_loss = torch.nn.NLLLoss( weight=torch.tensor([0] + [1] * (self.config.cn_vocab_size + 3), dtype=torch.float32)).to(self.config.gpu)
def build_neural_topic_model(self, corpus): self.build_trans_mat(corpus.OUTPUT.itos, corpus.TOPIC.itos) self.topic_embedder = Embedder(num_embeddings=self.topic_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.topic_layer = nn.Sequential( self.topic_embedder, nn.Linear(self.embed_size, self.hidden_size), nn.Tanh(), ) hidden = get_mlp([self.topic_vocab_size, 800, self.embed_size], 'Sigmoid') normal = NormalParameter(self.embed_size, 50) h_to_z = nn.Sequential(nn.Linear(50, self.topic_num), nn.Dropout(0.2)) topics = Topics(self.topic_num, self.topic_vocab_size) penalty = 0.8 self.ntm = NTMR(hidden=hidden, normal=normal, h_to_z=h_to_z, topics=topics, embedding=self.topic_embedder, penalty=penalty)
def __init__(self, vocab, hidden_size, hop=1, attn_mode='dot', padding_idx=None): super(EncoderMemNN, self).__init__() self.num_vocab = vocab self.max_hops = hop self.hidden_size = hidden_size self.attn_mode = attn_mode self.padding_idx = padding_idx for hop in range(self.max_hops + 1): C = Embedder(self.num_vocab, self.hidden_size, padding_idx=self.padding_idx) C.weight.data.normal_(0, 0.1) self.add_module("C_{}".format(hop), C) for hop in range(self.max_hops): A = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode=self.attn_mode, return_attn_only=True) self.add_module("A_{}".format(hop), A) self.C = AttrProxy(self, "C_") self.A = AttrProxy(self, "A_") self.softmax = nn.Softmax(dim=1)
def __init__(self, src_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", with_bridge=False, dropout=0.0, use_gpu=False, pretrain_epoch=5): super(Entity_Seq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.with_bridge = with_bridge self.dropout = dropout self.use_gpu = use_gpu self.pretrain_epoch = pretrain_epoch enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge1 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.bridge2 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, dropout=self.dropout) # Loss Definition if self.use_gpu: self.cuda()
def __init__(self, vocab_size, hidden_size, embed_units, max_hop=3, padding_idx=None, n_layers=1, attn_mode='mlp', dropout=0.0, use_gpu=False): super(Mem2Seq, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.embed_units = embed_units self.max_hop = max_hop self.padding_idx = padding_idx self.attn_mode = attn_mode self.dropout = dropout self.use_gpu = use_gpu self.n_layers = n_layers enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = DecoderMemNN(vocab=self.vocab_size, embedding_dim=self.hidden_size, hidden_size=self.hidden_size, hop=self.max_hop, dropout=self.dropout, padding_idx=self.padding_idx) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, input_size, hidden_size, dropout, padding_idx, n_layers=1, use_gpu=False): super(ContextRNN, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.n_layers = n_layers self.dropout = dropout self.dropout_layer = nn.Dropout(dropout) self.padding_idx = padding_idx self.embedding = Embedder( input_size, hidden_size, padding_idx=self.padding_idx) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout, bidirectional=True) self.W = nn.Linear(2 * hidden_size, hidden_size)
def __init__(self, vocab, embedding_dim, hidden_size, hop, dropout=0.0, num_layers=1, padding_idx=None, attn_mode=None): super(DecoderMemNN, self).__init__() self.num_vocab = vocab self.max_hops = hop self.embedding_dim = embedding_dim self.hidden_size = hidden_size self.dropout = dropout self.num_layers = num_layers self.padding_idx = padding_idx self.attn_mode = attn_mode self.rnn_input_size = self.embedding_dim self.out_input_size = self.hidden_size for hop in range(self.max_hops + 1): C = Embedder(self.num_vocab, embedding_dim, padding_idx=self.padding_idx) C.weight.data.normal_(0, 0.1) self.add_module("C_{}".format(hop), C) self.C = AttrProxy(self, "C_") if self.attn_mode is not None: self.attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode=self.attn_mode, project=False) self.rnn_input_size += self.hidden_size self.softmax = nn.Softmax(dim=1) self.log_softmax = nn.LogSoftmax(dim=-1) self.W = nn.Linear(self.embedding_dim, 1) self.W1 = nn.Linear(2 * self.embedding_dim, self.num_vocab) self.gru = nn.GRU(input_size=self.rnn_input_size, hidden_size=self.embedding_dim, num_layers=self.num_layers, dropout=self.dropout if self.num_layers > 1 else 0, batch_first=True)
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_bow=False, use_kd=False, use_dssm=False, use_posterior=False, weight_control=False, use_pg=False, use_gs=False, concat=False, pretrain_epoch=0): super(KnowledgeSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_bow = use_bow self.use_dssm = use_dssm self.weight_control = weight_control self.use_kd = use_kd self.use_pg = use_pg self.use_gs = use_gs self.use_posterior = use_posterior self.pretrain_epoch = pretrain_epoch self.baseline = 0 enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder knowledge_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) knowledge_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.knowledge_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=knowledge_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.prior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.posterior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat) self.log_softmax = nn.LogSoftmax(dim=-1) self.softmax = nn.Softmax(dim=-1) self.sigmoid = nn.Sigmoid() self.softplus = nn.Softplus() if self.use_bow: self.bow_output_layer = nn.Sequential( nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size), nn.Tanh(), nn.Linear(in_features=self.hidden_size, out_features=self.tgt_vocab_size), nn.LogSoftmax(dim=-1)) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.use_kd: self.knowledge_dropout = nn.Dropout() if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.kl_loss = torch.nn.KLDivLoss(size_average=True) if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_bow=False, use_kd=False, use_dssm=False, use_posterior=False, weight_control=False, use_pg=False, use_gs=False, concat=False, pretrain_epoch=0): super(KnowledgeSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_bow = use_bow self.use_dssm = use_dssm self.weight_control = weight_control self.use_kd = use_kd self.use_pg = use_pg self.use_gs = use_gs self.use_posterior = use_posterior self.pretrain_epoch = pretrain_epoch self.baseline = 0 bc = BertClient() enc_embedder = bc.encode(['你好', '吃饭了么']) # enc_embedder = Embedder(num_embeddings=self.src_vocab_size, # embedding_dim=self.embed_size, padding_idx=self.padding_idx) # Embedder(30004, 300, padding_idx=0) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) #定义一个linear层 ,通过Squential将网络层和激活函数结合起来,输出激活后的网络节点 if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder knowledge_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) knowledge_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.knowledge_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=knowledge_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.prior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.posterior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat) self.log_softmax = nn.LogSoftmax(dim=-1) self.softmax = nn.Softmax(dim=-1) self.sigmoid = nn.Sigmoid() self.softplus = nn.Softplus() """ Softplus():a smooth approximation to the ReLU function and can be used to constrain the output of a machine to always be positive. """ if self.use_bow: self.bow_output_layer = nn.Sequential( nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size), nn.Tanh(), nn.Linear(in_features=self.hidden_size, out_features=self.tgt_vocab_size), nn.LogSoftmax(dim=-1)) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.use_kd: self.knowledge_dropout = nn.Dropout() if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss( weight=self.weight, ignore_index=self.padding_idx, reduction='mean') #量化真实回复与基线生成的回复的不同 :NLLLoss() 负对数似然 self.kl_loss = torch.nn.KLDivLoss( size_average=True ) #KLDivLoss() #select related background knowledge -> lead the conversation if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, corpus, config): super(Seq2Seq, self).__init__() self.src_vocab_size = corpus.SRC.vocab_size self.tgt_vocab_size = corpus.TGT.vocab_size self.output_vocab_size = corpus.OUTPUT.vocab_size self.topic_vocab_size = corpus.TOPIC.vocab_size self.padding_idx = corpus.padding_idx self.embed_size = config.embed_size self.hidden_size = config.hidden_size self.num_layers = config.num_layers self.bidirectional = config.bidirectional self.attn_mode = config.attn_mode self.attn_hidden_size = config.attn_hidden_size self.with_bridge = config.with_bridge self.tie_embedding = config.tie_embedding self.dropout = config.dropout self.use_gpu = config.use_gpu self.decoder_attention_channels = config.decoder_attention_channels self.topic_k = config.topic_k self.topic_num = config.topic_num self.decoder_attention_channels = config.decoder_attention_channels self.use_ntm = config.use_ntm # topic self.build_neural_topic_model(corpus) self.t_to_feature = nn.Sequential( nn.Linear(self.embed_size * len(self.decoder_attention_channels), self.hidden_size), nn.Tanh(), nn.Dropout(p=self.dropout), ) # encoder enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) # bridge if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) # decoder if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) topic_vocab_size_ = self.topic_vocab_size if config.without_topic_project: topic_vocab_size_ = None if config.without_fd is not None and config.without_fd is True: RNNDecoder = RNNDecoder_noFd topic_vocab_size_ = None else: RNNDecoder = RNNDecoder_full self.decoder = RNNDecoder( input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.output_vocab_size, topic_size=topic_vocab_size_, trans_mat=self.trans_mat, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, tgt_unk_idx=corpus.OUTPUT.itos.index('<unk>'), attention_channels=self.decoder_attention_channels, ) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.output_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False): super(HSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.sub_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.hiera_encoder = RNNEncoder(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.encoder = HRNNEncoder(self.sub_encoder, self.hiera_encoder) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, corpus, config): super(Seq2Seq, self).__init__() self.src_vocab_size = corpus.SRC.vocab_size self.tgt_vocab_size = corpus.TGT.vocab_size self.embed_size = config.embed_size self.hidden_size = config.hidden_size self.padding_idx = corpus.padding_idx self.num_layers = config.num_layers self.bidirectional = config.bidirectional self.attn_mode = config.attn_mode self.attn_hidden_size = config.attn_hidden_size self.with_bridge = config.with_bridge self.tie_embedding = config.tie_embedding self.dropout = config.dropout self.use_gpu = config.use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, tie_embedding=False, margin=None, with_project=False, dropout=0.0, use_gpu=False): super(DSSM, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.margin = margin self.with_project = with_project src_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.src_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=src_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_project: self.project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size, bias=False) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size tgt_embedder = src_embedder else: tgt_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.tgt_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=tgt_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.use_gpu: self.cuda()
def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, max_hop=3, bidirectional=True, attn_mode='mlp', dropout=0.0, use_gpu=False): super(MemNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.max_hop = max_hop self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) dec_embedder = enc_embedder self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=dec_embedder, attn_mode=self.attn_mode, attn_hidden_size=self.hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) self.softmax = nn.Softmax(dim=-1) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_dssm=False, weight_control=False, use_pg=False, concat=False, pretrain_epoch=0, with_label=False): super(TwoStagePersonaSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_dssm = use_dssm self.weight_control = weight_control self.use_pg = use_pg self.pretrain_epoch = pretrain_epoch self.baseline = 0 self.with_label = with_label self.task_id = 1 enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder persona_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) persona_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.persona_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=persona_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.persona_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="general") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat, with_label=self.with_label) self.key_linear = nn.Linear(in_features=self.embed_size, out_features=self.hidden_size) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.persona_loss = NLLLoss(weight=None, reduction='mean') self.eps = 1e-7 if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode='mlp', dropout=0.0, with_bridge=True, use_gpu=False): super(PointerNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.with_bridge = with_bridge self.use_gpu = use_gpu embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) self.fact_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.hist_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.decoder = PointerDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=embedder, attn_mode=self.attn_mode, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.kl_loss = torch.nn.KLDivLoss(size_average=True) if self.use_gpu: self.cuda() self.weight = self.weight.cuda()