def __init__(self, args, pretrained, trainable_weight_idx): super(SPNet, self).__init__() self.args = args["arch"]["args"] # 1. word embedding layer # self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True) self.word_emb = PartiallyTrainEmbedding(pretrained, trainable_weight_idx) # 2. contextual embedding layer self.context_lstm = LSTM(input_size=self.args["word_dim"], hidden_size=self.args["hidden_size"], batch_first=True, num_layers=1, bidirectional=True, dropout=self.args["dropout"]) # 3. co-attention layer self.att_weight_co = Linear(self.args["hidden_size"] * 2, 1) # 4. fuse layer1 to combine self.fuse_m_layer = Linear(self.args["hidden_size"] * 8, self.args["hidden_size"] * 2) self.fuse_g_layer = Linear(self.args["hidden_size"] * 8, self.args["hidden_size"] * 2) # 5. self-attention layer self.att_weight_s = Linear(1, 1) # 6. fuse layer2 to combine # 7. question self-align layer self.align_weight_q = Linear(self.args["hidden_size"] * 2, 1) # 8. para self-align layer self.align_weight_p = Linear(self.args["hidden_size"] * 2, 1) # 9. relevance score(for task 1) self.score_weight_qp = nn.Bilinear(self.args["hidden_size"] * 2, self.args["hidden_size"] * 2, 1) # 9. shared LSTM layer (for task2) self.shared_lstm = LSTM(input_size=self.args["hidden_size"] * 2, hidden_size=self.args["hidden_size"], batch_first=True, num_layers=1, bidirectional=True, dropout=self.args["dropout"]) # 10.pointer network (for task2) self.pointer_weight_a1 = Linear(self.args["hidden_size"] * 4, 1) self.pointer_weight_a2 = Linear(self.args["hidden_size"] * 4, 1)
def __init__(self, args, pretrained): super(LATTE, self).__init__() self.args = args # 1. Character Embedding Layer self.char_emb = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=1) nn.init.uniform_(self.char_emb.weight, -0.001, 0.001) self.char_conv = nn.Sequential( nn.Conv2d(1, args.char_channel_size, (args.char_dim, args.char_channel_width)), nn.ReLU()) # 2. Word Embedding Layer # initialize word embedding with GloVe self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True) # highway network assert self.args.hidden_size * 2 == (self.args.char_channel_size + self.args.word_dim) for i in range(2): setattr( self, 'highway_linear{}'.format(i), nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.ReLU())) setattr( self, 'highway_gate{}'.format(i), nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 4. Attention Flow Layer self.att_weight_c = Linear(args.hidden_size * 2, 1) self.att_weight_q = Linear(args.hidden_size * 2, 1) self.att_weight_cq = Linear(args.hidden_size * 2, 1) #------------------------------------------------------------------------------------- # 5. feed_forward_network self.fc1 = Linear(8800, 200) self.fc2 = Linear(8800, 200) #11x800 self.relu = nn.ReLU() # 6. Distribution Similarity self.fc4 = Linear(2200, 2048) #11*200 up uc concate k=2048 self.fc3 = Linear(200, 2048) # self.cosSi = nn.CosineSimilarity(dim=0, eps=1e-6) #dim 尺寸确定一下 # 7. Known Type Classifier self.fc5 = Linear(2048, 3) ####??? self.fc6 = Linear(2048, 3) # 8. ranking score layer self.f_weight = Linear(200, 1) self.g_weight = Linear(2048, 1)
def __init__(self, args, pretrained): super(BiDAF, self).__init__() self.args = args # 1. Character Embedding Layer self.char_emb = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=1) nn.init.uniform_(self.char_emb.weight, -0.001, 0.001) self.char_conv = nn.Sequential( nn.Conv2d(1, args.char_channel_size, (args.char_dim, args.char_channel_width)), nn.ReLU() ) # 2. Word Embedding Layer # initialize word embedding with GloVe self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True) # highway network assert self.args.hidden_size * 2 == (self.args.char_channel_size + self.args.word_dim) for i in range(2): setattr(self, 'highway_linear{}'.format(i), nn.Sequential(Linear(args.hidden_size * 2, args.hidden_size * 2), nn.ReLU())) setattr(self, 'highway_gate{}'.format(i), nn.Sequential(Linear(args.hidden_size * 2, args.hidden_size * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 4. Attention Flow Layer self.att_weight_c = Linear(args.hidden_size * 2, 1) self.att_weight_q = Linear(args.hidden_size * 2, 1) self.att_weight_cq = Linear(args.hidden_size * 2, 1) # 5. Modeling Layer self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 6. Output Layer self.p1_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p1_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.p2_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p2_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.output_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) self.dropout = nn.Dropout(p=args.dropout)
def __init__(self, args, pretrained, trainable_weight_idx): super(RNet, self).__init__() self.args = args["arch"]["args"] # 1. Word Embedding Layer # self.word_emb = PartiallyTrainEmbedding(pretrained, trainable_weight_idx) self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True) # 2. Question & Passage Embedding Layer self.context_rnn = GRU(input_size=self.args["word_dim"], hidden_size=self.args["hidden_size"], bidirectional=True, batch_first=True, dropout=self.args["dropout"]) # 3.Question-Passage Matching layer self.match_rnn = MatchRNN(mode="GRU", hp_input_size=self.args["hidden_size"] * 2, hq_input_size=self.args["hidden_size"] * 2, hidden_size=self.args["hidden_size"], bidirectional=True, gated_attention=True, dropout_p=self.args["dropout"], enable_layer_norm=True) # 4. Passage self-Matching layer self.self_match_rnn = MatchRNN( mode="GRU", hp_input_size=self.args["hidden_size"] * 2, hq_input_size=self.args["hidden_size"] * 2, hidden_size=self.args["hidden_size"], bidirectional=True, gated_attention=True, dropout_p=self.args["dropout"], enable_layer_norm=True) # 5. Modeling layer self.modeling_rnn1 = GRU(input_size=self.args["hidden_size"] * 2, hidden_size=self.args["hidden_size"], bidirectional=True, batch_first=True, dropout=self.args["dropout"]) # 6. Pointer Network layer self.p1_weight_g = Linear(self.args["hidden_size"] * 2, 1, dropout=self.args["dropout"]) self.p1_weight_m = Linear(self.args["hidden_size"] * 2, 1, dropout=self.args["dropout"]) self.p2_weight_g = Linear(self.args["hidden_size"] * 2, 1, dropout=self.args["dropout"]) self.p2_weight_m = Linear(self.args["hidden_size"] * 2, 1, dropout=self.args["dropout"]) self.output_rnn = GRU(input_size=self.args["hidden_size"] * 2, hidden_size=self.args["hidden_size"], bidirectional=True, batch_first=True, dropout=self.args["dropout"]) self.dropout = nn.Dropout(p=self.args["dropout"]) # para ranking (for task 2) self.score_weight_qp = nn.Bilinear(self.args["hidden_size"] * 2, self.args["hidden_size"] * 2, 1) # self-align layer for question self.align_weight_q = Linear(self.args["hidden_size"] * 2, 1) # self-align layer for paras self.align_weight_p = Linear(self.args["hidden_size"] * 2, 1)
def __init__(self, args): super(BiDAF, self).__init__() self.args = args assert self.args.hidden_size * 2 == (self.args.char_channel_size) for i in range(2): setattr( self, f'highway_linear{i}', nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.ReLU())) setattr( self, f'highway_gate{i}', nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 4. Attention Flow Layer self.att_weight_c = Linear(args.hidden_size * 2, 1) self.att_weight_q = Linear(args.hidden_size * 2, 1) self.att_weight_cq = Linear(args.hidden_size * 2, 1) # 5. Modeling Layer self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 6. Output Layer self.p1_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p1_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.p2_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p2_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.output_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # Prevent from over-fitting self.dropout = nn.Dropout(p=args.dropout)
def __init__(self, hps): super(BiDAF, self).__init__() self.hps = hps # 1. Character Embedding Layer self.char_emb = nn.Embedding(hps["char_vocab_size"], hps["char_dim"], padding_idx=1) nn.init.uniform_(self.char_emb.weight, -0.001, 0.001) hps['char_channel_size'] = hps["hidden_size"] * 2 - hps["word_dim"] assert hps['char_channel_size'] > 0 self.char_conv = nn.Conv2d( 1, hps["char_channel_size"], (hps["char_dim"], hps["char_channel_width"])) # 2. Word Embedding Layer # initialize word embedding with GloVe self.word_emb = nn.Embedding(50000, 50) # highway network # assert self.hps["hidden_size"] * 2 == (self.hps["char_channel_size"] + self.hps["word_dim"]) for i in range(2): setattr( self, f'highway_linear{i}', nn.Sequential( Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2), nn.ReLU())) setattr( self, f'highway_gate{i}', nn.Sequential( Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=hps["hidden_size"] * 2, hidden_size=hps["hidden_size"], bidirectional=True, batch_first=True, dropout=hps["dropout"]) # 4. Attention Flow Layer self.att_weight_c = Linear(hps["hidden_size"] * 2, 1) self.att_weight_q = Linear(hps["hidden_size"] * 2, 1) self.att_weight_cq = Linear(hps["hidden_size"] * 2, 1) # 5. Modeling Layer self.modeling_LSTM1 = LSTM(input_size=hps["hidden_size"] * 8, hidden_size=hps["hidden_size"], bidirectional=True, batch_first=True, dropout=hps["dropout"]) self.modeling_LSTM2 = LSTM(input_size=hps["hidden_size"] * 2, hidden_size=hps["hidden_size"], bidirectional=True, batch_first=True, dropout=hps["dropout"]) # 6. Output Layer self.p1_weight_g = Linear(hps["hidden_size"] * 8, 1, dropout=hps["dropout"]) self.p1_weight_m = Linear(hps["hidden_size"] * 2, 1, dropout=hps["dropout"]) self.p2_weight_g = Linear(hps["hidden_size"] * 8, 1, dropout=hps["dropout"]) self.p2_weight_m = Linear(hps["hidden_size"] * 2, 1, dropout=hps["dropout"]) self.output_LSTM = LSTM(input_size=hps["hidden_size"] * 2, hidden_size=hps["hidden_size"], bidirectional=True, batch_first=True, dropout=hps["dropout"]) self.dropout = nn.Dropout(p=hps["dropout"])
def __init__(self, char_vocab_size, word_vocab_size, pretrained, word_dim=100, char_dim=8, char_channel_width=5, char_channel_size=100, dropout_rate=0.2, hidden_size=100): super(BiDAF, self).__init__() self.word_dim = word_dim self.char_dim = char_dim self.char_channel_width = char_channel_width self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.char_vocab_size = char_vocab_size self.char_channel_size = char_channel_size self.word_vocab_size = word_vocab_size # 1. Character Embedding Layer self.char_emb = nn.Embedding(self.char_vocab_size, self.char_dim, padding_idx=1) nn.init.uniform_(self.char_emb.weight, -0.001, 0.001) self.char_conv = nn.Conv2d(1, self.char_channel_size, (self.char_dim, self.char_channel_width)) # 2. Word Embedding Layer # initialize word embedding with GloVe # Freeze layer to prevent gradient update self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True) # highway network assert (self.hidden_size * 2) == (self.char_channel_size + self.word_dim) # Create 2 hidden layers for i in range(2): setattr( self, 'highway_linear' + str(i), nn.Sequential( Linear(self.hidden_size * 2, self.hidden_size * 2), nn.ReLU())) setattr( self, 'highway_gate' + str(i), nn.Sequential( Linear(self.hidden_size * 2, self.hidden_size * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=self.hidden_size * 2, hidden_size=self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) # 4. Attention Flow Layer self.att_weight_c = Linear(self.hidden_size * 2, 1) self.att_weight_q = Linear(self.hidden_size * 2, 1) self.att_weight_cq = Linear(self.hidden_size * 2, 1) # 5. Modeling Layer self.modeling_LSTM1 = LSTM(input_size=self.hidden_size * 8, hidden_size=self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) self.modeling_LSTM2 = LSTM(input_size=self.hidden_size * 2, hidden_size=self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) # 6. Output Layer # No softmax applied here reason: https://stackoverflow.com/questions/57516027/does-pytorch-apply-softmax-automatically-in-nn-linear self.p1_weight_g = Linear(self.hidden_size * 8, 1, dropout=self.dropout_rate) self.p1_weight_m = Linear(self.hidden_size * 2, 1, dropout=self.dropout_rate) self.p2_weight_g = Linear(self.hidden_size * 8, 1, dropout=self.dropout_rate) self.p2_weight_m = Linear(self.hidden_size * 2, 1, dropout=self.dropout_rate) self.output_LSTM = LSTM(input_size=self.hidden_size * 2, hidden_size=self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) self.dropout = nn.Dropout(p=self.dropout_rate)