class Encoder(nn.Module): """ Encoder that just runs awd Inputs: input, seq_len - **input** of shape: Outputs: output - **output** is a tuple: (all_hid, last_hidden_states, emb) - NOT batch first """ def __init__(self, args): super().__init__() self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained() def forward(self, inp, seq_lengths): with torch.no_grad(): (outputs, outputs_rev), (hidden, hidden_rev), emb = self.bi_awd( input=inp, seq_lengths=seq_lengths) return (outputs, outputs_rev), (hidden, hidden_rev), emb
def __init__(self, args): super().__init__() self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained()
def __init__(self, args, bi_awd_layer, architecture): super().__init__() self.args = args self.bi_awd_layer = bi_awd_layer self.architecture = architecture self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2) self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2) self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 + self.args.n_features2, hidden_size=self.args.n_hid2, num_layers=3, bidirectional=True, batch_first=True) self.drop = nn.Dropout(p=0.5) self.relu = nn.ReLU() if bi_awd_layer in ["second"]: self.project = nn.Linear(2560, 300, bias=False) elif bi_awd_layer in ["last"]: self.project = nn.Linear(320 * 2, 300, bias=False) if self.architecture in ["before", "both"]: self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 + self.args.n_features2 + 300, hidden_size=self.args.n_hid2, num_layers=3, bidirectional=True, batch_first=True) init_weights(self) self.init_weights() self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained()
def __init__(self, args, bi_awd_layer, project_size=None): super().__init__() self.args = args self.bi_awd_layer = bi_awd_layer self.project_size = project_size self.drop = nn.Dropout(args.hid_dropout) if project_size is not None and bi_awd_layer in ["first", "second"]: self.project = nn.Linear(2 * 1280, project_size, bias=False) elif project_size is not None and bi_awd_layer in ["last"]: self.project = nn.Linear(2 * 320, project_size, bias=False) if project_size is not None: self.lstm = nn.LSTM(project_size, args.n_hid, bidirectional=True, batch_first=True) elif bi_awd_layer in ["first", "second"]: self.lstm = nn.LSTM(2 * 1280, args.n_hid, bidirectional=True, batch_first=True) elif bi_awd_layer in ["last"]: self.lstm = nn.LSTM(2 * 320, args.n_hid, bidirectional=True, batch_first=True) init_weights(self) self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained()
def __init__(self, args, bi_awd_layer, architecture): super().__init__(args) self.architecture = architecture self.bi_awd_layer = bi_awd_layer if bi_awd_layer in ["first", "second"]: self.project = nn.Linear(2560, 300, bias=False) elif bi_awd_layer in ["last"]: self.project = nn.Linear(320 * 2, 300, bias=False) if self.architecture in ["before", "both"]: self.lstm = nn.LSTM(128 + 300, args.n_hid, bidirectional=True, batch_first=True) init_weights(self) self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained()
class Encoder(nn.Module): """ Encoder with bi_awd concatenated to the LSTM input Parameters: -- bi_awd_layer: first, second or last -- project_size: size of projection layer from bi_awd to lstm Inputs: input, seq_len - **input** of shape Outputs: output - **output** of shape (batch_size, seq_len, hidden_size*2) """ def __init__(self, args, bi_awd_layer, project_size=None): super().__init__() self.args = args self.bi_awd_layer = bi_awd_layer self.project_size = project_size self.drop = nn.Dropout(args.hid_dropout) if project_size is not None and bi_awd_layer in ["first", "second"]: self.project = nn.Linear(2 * 1280, project_size, bias=False) elif project_size is not None and bi_awd_layer in ["last"]: self.project = nn.Linear(2 * 320, project_size, bias=False) if project_size is not None: self.lstm = nn.LSTM(project_size, args.n_hid, bidirectional=True, batch_first=True) elif bi_awd_layer in ["first", "second"]: self.lstm = nn.LSTM(2 * 1280, args.n_hid, bidirectional=True, batch_first=True) elif bi_awd_layer in ["last"]: self.lstm = nn.LSTM(2 * 320, args.n_hid, bidirectional=True, batch_first=True) init_weights(self) self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained() def forward(self, inp, seq_lengths): with torch.no_grad(): (all_hid, all_hid_rev), _, _ = self.bi_awd( inp, seq_lengths) # all_hid, last_hidden_states, emb if self.bi_awd_layer == "first": bi_awd_hid = all_hid[0] bi_awd_hid_rev = all_hid_rev[0] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 1280) elif self.bi_awd_layer == "second": bi_awd_hid = all_hid[1] bi_awd_hid_rev = all_hid_rev[1] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 1280) elif self.bi_awd_layer == "last": bi_awd_hid = all_hid[2] bi_awd_hid_rev = all_hid_rev[2] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 320) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 320) bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev), dim=2) # (bs, seq_len, 640 or 2560) if self.project_size is not None: bi_awd_hid = self.project( bi_awd_hid) # (bs, seq_len, project_size) del bi_awd_hid_rev ### End BiAWDEmbedding bi_awd_hid = self.drop( bi_awd_hid) #( batch_size, seq_len, project_size or 2*1280) pack = nn.utils.rnn.pack_padded_sequence(bi_awd_hid, seq_lengths, batch_first=True) packed_output, _ = self.lstm(pack) #h = (2, batch_size, hidden_size) output, _ = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=True) #(batch_size, seq_len, hidden_size*2) return output
class Encoder(BaseEncoder): """ Encoder with bi_awd concatenated to the LSTM output Parameters: -- bi_awd_layer: first, second or last -- architecture: before, after or both Inputs: input, seq_len - **input** of shape Outputs: output - **output** of shape (batch_size, seq_len, hidden_size*2 + 300) if arch is after/both else (batch_size, seq_len, hidden_size*2) """ def __init__(self, args, bi_awd_layer, architecture): super().__init__(args) self.architecture = architecture self.bi_awd_layer = bi_awd_layer if bi_awd_layer in ["first", "second"]: self.project = nn.Linear(2560, 300, bias=False) elif bi_awd_layer in ["last"]: self.project = nn.Linear(320 * 2, 300, bias=False) if self.architecture in ["before", "both"]: self.lstm = nn.LSTM(128 + 300, args.n_hid, bidirectional=True, batch_first=True) init_weights(self) self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained() def forward(self, inp, seq_lengths): with torch.no_grad(): (all_hid, all_hid_rev), _, _ = self.bi_awd( inp, seq_lengths) # all_hid, last_hidden_states, emb if self.bi_awd_layer == "first": bi_awd_hid = all_hid[0] bi_awd_hid_rev = all_hid_rev[0] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 1280) elif self.bi_awd_layer == "second": bi_awd_hid = all_hid[1] bi_awd_hid_rev = all_hid_rev[1] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 1280) elif self.bi_awd_layer == "last": bi_awd_hid = all_hid[2] bi_awd_hid_rev = all_hid_rev[2] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 320) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 320) bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev), dim=2) # (bs, seq_len, something big) bi_awd_hid = self.project(bi_awd_hid) # (bs, seq_len, 300) del bi_awd_hid_rev ### End BiAWDEmbedding inp = self.embed(inp) # (batch_size, seq_len, emb_size) inp = self.in_drop1d(inp) # feature dropout inp = self.in_drop2d( inp) # (batch_size, seq_len, emb_size) - 2d dropout inp = inp.permute(0, 2, 1) # (batch_size, emb_size, seq_len) conv_cat = torch.cat( [self.relu(conv(inp)) for conv in self.convs], dim=1) # (batch_size, emb_size*len(convs), seq_len) inp = self.relu( self.cnn_final(conv_cat)) #(batch_size, out_channels=128, seq_len) inp = inp.permute(0, 2, 1) #(batch_size, seq_len, out_channels=128) if self.architecture in ["before", "both"]: inp = torch.cat((inp, bi_awd_hid), dim=2) inp = self.drop(inp) #( batch_size, seq_len, lstm_input_size) pack = nn.utils.rnn.pack_padded_sequence(inp, seq_lengths, batch_first=True) packed_output, _ = self.lstm(pack) #h = (2, batch_size, hidden_size) output, _ = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=True) #(batch_size, seq_len, hidden_size*2) if self.architecture in ["after", "both"]: output = torch.cat( (output, bi_awd_hid), dim=2) # (batch_size, seq_len, hidden_size*2+300) return output
class Encoder(BaseEncoder): """ Encoder with bi_awd concatenated to the LSTM output Parameters: -- bi_awd_layer: last or second -- architecture: before, after or both Inputs: input, seq_len - **input** of shape Outputs: output - **output** of shape (batch_size, seq_len, hidden_size*2 + 300) if arch is after/both else (batch_size, seq_len, hidden_size*2) """ def __init__(self, args, direction): super().__init__(args) self.direction = direction self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained() def forward(self, inp, seq_lengths): with torch.no_grad(): (all_hid, all_hid_rev), _, _ = self.bi_awd( inp, seq_lengths) # all_hid, last_hidden_states, emb if self.direction == "forward": bi_awd_hid = all_hid[2].permute(1, 0, 2) # (bs, seq_len, 320) elif self.direction == "backward": bi_awd_hid = all_hid_rev[2].permute(1, 0, 2) # (bs, seq_len, 320) ### End BiAWDEmbedding inp = self.embed(inp) # (batch_size, seq_len, emb_size) inp = self.in_drop1d(inp) # feature dropout inp = self.in_drop2d( inp) # (batch_size, seq_len, emb_size) - 2d dropout inp = inp.permute(0, 2, 1) # (batch_size, emb_size, seq_len) conv_cat = torch.cat( [self.relu(conv(inp)) for conv in self.convs], dim=1) # (batch_size, emb_size*len(convs), seq_len) inp = self.relu( self.cnn_final(conv_cat)) #(batch_size, out_channels=128, seq_len) inp = inp.permute(0, 2, 1) #(batch_size, seq_len, out_channels=128) inp = self.drop(inp) #( batch_size, seq_len, 128) pack = nn.utils.rnn.pack_padded_sequence(inp, seq_lengths, batch_first=True) packed_output, _ = self.lstm(pack) #h = (2, batch_size, hidden_size) output, _ = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=True) #(batch_size, seq_len, hidden_size*2) output = torch.cat((output, bi_awd_hid), dim=2) # (batch_size, seq_len, hidden_size*2+320) return output
class Encoder(nn.Module): def __init__(self, args, bi_awd_layer, architecture): super().__init__() self.args = args self.bi_awd_layer = bi_awd_layer self.architecture = architecture self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2) self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2) self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 + self.args.n_features2, hidden_size=self.args.n_hid2, num_layers=3, bidirectional=True, batch_first=True) self.drop = nn.Dropout(p=0.5) self.relu = nn.ReLU() if bi_awd_layer in ["second"]: self.project = nn.Linear(2560, 300, bias=False) elif bi_awd_layer in ["last"]: self.project = nn.Linear(320 * 2, 300, bias=False) if self.architecture in ["before", "both"]: self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 + self.args.n_features2 + 300, hidden_size=self.args.n_hid2, num_layers=3, bidirectional=True, batch_first=True) init_weights(self) self.init_weights() self.bi_awd = BiAWDEmbedding(ntoken=21, ninp=320, nhid=1280, nlayers=3, tie_weights=True) self.bi_awd.load_pretrained() def init_weights(self): self.densel1.bias.data.zero_() torch.nn.init.xavier_uniform_(tensor=self.densel1.weight.data, gain=1.0) self.densel2.bias.data.zero_() torch.nn.init.xavier_uniform_(tensor=self.densel2.weight.data, gain=1.0) def forward(self, inp, raw, seq_lengths): #Something like this. Look into it when needed with torch.no_grad(): (all_hid, all_hid_rev), _, _ = self.bi_awd( raw, seq_lengths) # all_hid, last_hidden_states, emb if self.bi_awd_layer == "last": bi_awd_hid = all_hid[2] bi_awd_hid_rev = all_hid_rev[2] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 320) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 320) elif self.bi_awd_layer == "second": bi_awd_hid = all_hid[1] bi_awd_hid_rev = all_hid_rev[1] bi_awd_hid = bi_awd_hid.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0, 2) # (bs, seq_len, 1280) bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev), dim=2) # (bs, seq_len, something big) bi_awd_hid = self.project(bi_awd_hid) # (bs, seq_len, 300) del bi_awd_hid_rev ### End BiAWDEmbedding x = self.relu(self.densel2(self.relu(self.densel1(inp)))) inp = torch.cat((inp, x), dim=2) if self.architecture in ["before", "both"]: inp = torch.cat((inp, bi_awd_hid), dim=2) pack = nn.utils.rnn.pack_padded_sequence(inp, seq_lengths, batch_first=True) packed_output, _ = self.bi_rnn(pack) output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True) if self.architecture in ["after", "both"]: output = torch.cat( (output, bi_awd_hid), dim=2) # (batch_size, seq_len, hidden_size*2+300) return output