예제 #1
0
    def __init__(self, args, pretrained, trainable_weight_idx):
        super(SPNet, self).__init__()
        self.args = args["arch"]["args"]

        # 1. word embedding layer
        # self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)
        self.word_emb = PartiallyTrainEmbedding(pretrained,
                                                trainable_weight_idx)

        # 2. contextual embedding layer
        self.context_lstm = LSTM(input_size=self.args["word_dim"],
                                 hidden_size=self.args["hidden_size"],
                                 batch_first=True,
                                 num_layers=1,
                                 bidirectional=True,
                                 dropout=self.args["dropout"])
        # 3. co-attention layer
        self.att_weight_co = Linear(self.args["hidden_size"] * 2, 1)

        # 4. fuse layer1 to combine
        self.fuse_m_layer = Linear(self.args["hidden_size"] * 8,
                                   self.args["hidden_size"] * 2)
        self.fuse_g_layer = Linear(self.args["hidden_size"] * 8,
                                   self.args["hidden_size"] * 2)

        # 5.  self-attention layer
        self.att_weight_s = Linear(1, 1)

        # 6. fuse layer2 to combine

        # 7. question self-align layer
        self.align_weight_q = Linear(self.args["hidden_size"] * 2, 1)

        # 8. para self-align layer
        self.align_weight_p = Linear(self.args["hidden_size"] * 2, 1)

        # 9. relevance score(for task 1)
        self.score_weight_qp = nn.Bilinear(self.args["hidden_size"] * 2,
                                           self.args["hidden_size"] * 2, 1)

        # 9. shared LSTM layer (for task2)
        self.shared_lstm = LSTM(input_size=self.args["hidden_size"] * 2,
                                hidden_size=self.args["hidden_size"],
                                batch_first=True,
                                num_layers=1,
                                bidirectional=True,
                                dropout=self.args["dropout"])
        # 10.pointer network (for task2)
        self.pointer_weight_a1 = Linear(self.args["hidden_size"] * 4, 1)
        self.pointer_weight_a2 = Linear(self.args["hidden_size"] * 4, 1)
예제 #2
0
    def __init__(self, args, pretrained):
        super(LATTE, self).__init__()
        self.args = args

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(args.char_vocab_size,
                                     args.char_dim,
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Sequential(
            nn.Conv2d(1, args.char_channel_size,
                      (args.char_dim, args.char_channel_width)), nn.ReLU())

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert self.args.hidden_size * 2 == (self.args.char_channel_size +
                                             self.args.word_dim)
        for i in range(2):
            setattr(
                self, 'highway_linear{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, 'highway_gate{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        #-------------------------------------------------------------------------------------
        # 5. feed_forward_network
        self.fc1 = Linear(8800, 200)
        self.fc2 = Linear(8800, 200)  #11x800
        self.relu = nn.ReLU()

        # 6. Distribution Similarity
        self.fc4 = Linear(2200, 2048)  #11*200 up uc concate k=2048
        self.fc3 = Linear(200, 2048)  #
        self.cosSi = nn.CosineSimilarity(dim=0, eps=1e-6)  #dim 尺寸确定一下

        # 7. Known Type Classifier
        self.fc5 = Linear(2048, 3)  ####???
        self.fc6 = Linear(2048, 3)

        # 8. ranking score layer
        self.f_weight = Linear(200, 1)
        self.g_weight = Linear(2048, 1)
예제 #3
0
    def __init__(self, args, pretrained):
        super(BiDAF, self).__init__()
        self.args = args

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Sequential(
            nn.Conv2d(1, args.char_channel_size, (args.char_dim, args.char_channel_width)),
            nn.ReLU()
            )

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert self.args.hidden_size * 2 == (self.args.char_channel_size + self.args.word_dim)
        for i in range(2):
            setattr(self, 'highway_linear{}'.format(i),
                    nn.Sequential(Linear(args.hidden_size * 2, args.hidden_size * 2),
                                  nn.ReLU()))
            setattr(self, 'highway_gate{}'.format(i),
                    nn.Sequential(Linear(args.hidden_size * 2, args.hidden_size * 2),
                                  nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout)
        self.p1_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout)
        self.p2_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout)
        self.p2_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout)

        self.output_LSTM = LSTM(input_size=args.hidden_size * 2,
                                hidden_size=args.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=args.dropout)

        self.dropout = nn.Dropout(p=args.dropout)
예제 #4
0
    def __init__(self, args, pretrained, trainable_weight_idx):
        super(RNet, self).__init__()
        self.args = args["arch"]["args"]

        # 1. Word Embedding Layer
        # self.word_emb = PartiallyTrainEmbedding(pretrained, trainable_weight_idx)
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # 2. Question & Passage Embedding Layer
        self.context_rnn = GRU(input_size=self.args["word_dim"],
                               hidden_size=self.args["hidden_size"],
                               bidirectional=True,
                               batch_first=True,
                               dropout=self.args["dropout"])

        # 3.Question-Passage Matching layer
        self.match_rnn = MatchRNN(mode="GRU",
                                  hp_input_size=self.args["hidden_size"] * 2,
                                  hq_input_size=self.args["hidden_size"] * 2,
                                  hidden_size=self.args["hidden_size"],
                                  bidirectional=True,
                                  gated_attention=True,
                                  dropout_p=self.args["dropout"],
                                  enable_layer_norm=True)

        # 4. Passage self-Matching layer
        self.self_match_rnn = MatchRNN(
            mode="GRU",
            hp_input_size=self.args["hidden_size"] * 2,
            hq_input_size=self.args["hidden_size"] * 2,
            hidden_size=self.args["hidden_size"],
            bidirectional=True,
            gated_attention=True,
            dropout_p=self.args["dropout"],
            enable_layer_norm=True)

        # 5. Modeling layer
        self.modeling_rnn1 = GRU(input_size=self.args["hidden_size"] * 2,
                                 hidden_size=self.args["hidden_size"],
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=self.args["dropout"])

        # 6. Pointer Network layer
        self.p1_weight_g = Linear(self.args["hidden_size"] * 2,
                                  1,
                                  dropout=self.args["dropout"])
        self.p1_weight_m = Linear(self.args["hidden_size"] * 2,
                                  1,
                                  dropout=self.args["dropout"])
        self.p2_weight_g = Linear(self.args["hidden_size"] * 2,
                                  1,
                                  dropout=self.args["dropout"])
        self.p2_weight_m = Linear(self.args["hidden_size"] * 2,
                                  1,
                                  dropout=self.args["dropout"])

        self.output_rnn = GRU(input_size=self.args["hidden_size"] * 2,
                              hidden_size=self.args["hidden_size"],
                              bidirectional=True,
                              batch_first=True,
                              dropout=self.args["dropout"])

        self.dropout = nn.Dropout(p=self.args["dropout"])

        # para ranking (for task 2)
        self.score_weight_qp = nn.Bilinear(self.args["hidden_size"] * 2,
                                           self.args["hidden_size"] * 2, 1)
        # self-align layer for question
        self.align_weight_q = Linear(self.args["hidden_size"] * 2, 1)
        # self-align layer for paras
        self.align_weight_p = Linear(self.args["hidden_size"] * 2, 1)
예제 #5
0
    def __init__(self, args):
        super(BiDAF, self).__init__()
        self.args = args

        assert self.args.hidden_size * 2 == (self.args.char_channel_size)

        for i in range(2):
            setattr(
                self, f'highway_linear{i}',
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, f'highway_gate{i}',
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p1_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)

        self.output_LSTM = LSTM(input_size=args.hidden_size * 2,
                                hidden_size=args.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=args.dropout)

        # Prevent from over-fitting
        self.dropout = nn.Dropout(p=args.dropout)
예제 #6
0
    def __init__(self, hps):
        super(BiDAF, self).__init__()
        self.hps = hps
        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(hps["char_vocab_size"],
                                     hps["char_dim"],
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)
        hps['char_channel_size'] = hps["hidden_size"] * 2 - hps["word_dim"]
        assert hps['char_channel_size'] > 0

        self.char_conv = nn.Conv2d(
            1, hps["char_channel_size"],
            (hps["char_dim"], hps["char_channel_width"]))

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding(50000, 50)

        # highway network
        # assert self.hps["hidden_size"] * 2 == (self.hps["char_channel_size"] + self.hps["word_dim"])
        for i in range(2):
            setattr(
                self, f'highway_linear{i}',
                nn.Sequential(
                    Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2),
                    nn.ReLU()))
            setattr(
                self, f'highway_gate{i}',
                nn.Sequential(
                    Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=hps["hidden_size"] * 2,
                                 hidden_size=hps["hidden_size"],
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=hps["dropout"])

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(hps["hidden_size"] * 2, 1)
        self.att_weight_q = Linear(hps["hidden_size"] * 2, 1)
        self.att_weight_cq = Linear(hps["hidden_size"] * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=hps["hidden_size"] * 8,
                                   hidden_size=hps["hidden_size"],
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=hps["dropout"])

        self.modeling_LSTM2 = LSTM(input_size=hps["hidden_size"] * 2,
                                   hidden_size=hps["hidden_size"],
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=hps["dropout"])

        # 6. Output Layer
        self.p1_weight_g = Linear(hps["hidden_size"] * 8,
                                  1,
                                  dropout=hps["dropout"])
        self.p1_weight_m = Linear(hps["hidden_size"] * 2,
                                  1,
                                  dropout=hps["dropout"])
        self.p2_weight_g = Linear(hps["hidden_size"] * 8,
                                  1,
                                  dropout=hps["dropout"])
        self.p2_weight_m = Linear(hps["hidden_size"] * 2,
                                  1,
                                  dropout=hps["dropout"])

        self.output_LSTM = LSTM(input_size=hps["hidden_size"] * 2,
                                hidden_size=hps["hidden_size"],
                                bidirectional=True,
                                batch_first=True,
                                dropout=hps["dropout"])

        self.dropout = nn.Dropout(p=hps["dropout"])
예제 #7
0
    def __init__(self,
                 char_vocab_size,
                 word_vocab_size,
                 pretrained,
                 word_dim=100,
                 char_dim=8,
                 char_channel_width=5,
                 char_channel_size=100,
                 dropout_rate=0.2,
                 hidden_size=100):

        super(BiDAF, self).__init__()

        self.word_dim = word_dim
        self.char_dim = char_dim
        self.char_channel_width = char_channel_width
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.char_vocab_size = char_vocab_size
        self.char_channel_size = char_channel_size
        self.word_vocab_size = word_vocab_size

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(self.char_vocab_size,
                                     self.char_dim,
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Conv2d(1, self.char_channel_size,
                                   (self.char_dim, self.char_channel_width))

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        # Freeze layer to prevent gradient update
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert (self.hidden_size * 2) == (self.char_channel_size +
                                          self.word_dim)
        # Create 2 hidden layers
        for i in range(2):
            setattr(
                self, 'highway_linear' + str(i),
                nn.Sequential(
                    Linear(self.hidden_size * 2, self.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, 'highway_gate' + str(i),
                nn.Sequential(
                    Linear(self.hidden_size * 2, self.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=self.hidden_size * 2,
                                 hidden_size=self.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=self.dropout_rate)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(self.hidden_size * 2, 1)
        self.att_weight_q = Linear(self.hidden_size * 2, 1)
        self.att_weight_cq = Linear(self.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=self.hidden_size * 8,
                                   hidden_size=self.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=self.dropout_rate)

        self.modeling_LSTM2 = LSTM(input_size=self.hidden_size * 2,
                                   hidden_size=self.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=self.dropout_rate)

        # 6. Output Layer
        # No softmax applied here reason: https://stackoverflow.com/questions/57516027/does-pytorch-apply-softmax-automatically-in-nn-linear
        self.p1_weight_g = Linear(self.hidden_size * 8,
                                  1,
                                  dropout=self.dropout_rate)
        self.p1_weight_m = Linear(self.hidden_size * 2,
                                  1,
                                  dropout=self.dropout_rate)
        self.p2_weight_g = Linear(self.hidden_size * 8,
                                  1,
                                  dropout=self.dropout_rate)
        self.p2_weight_m = Linear(self.hidden_size * 2,
                                  1,
                                  dropout=self.dropout_rate)

        self.output_LSTM = LSTM(input_size=self.hidden_size * 2,
                                hidden_size=self.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=self.dropout_rate)

        self.dropout = nn.Dropout(p=self.dropout_rate)