def __init__(self, data, opt): super(WordSequence, self).__init__() self.gpu = opt.gpu self.droplstm = nn.Dropout(opt.dropout) self.wordrep = WordRep(data, opt) self.input_size = data.word_emb_dim self.input_size += opt.char_hidden_dim if data.feat_config is not None: for idx in range(len(data.feature_emb_dims)): self.input_size += data.feature_emb_dims[idx] # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. lstm_hidden = opt.hidden_dim // 2 self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=1, batch_first=True, bidirectional=True) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(opt.hidden_dim, data.label_alphabet.size() + 2) if opt.gpu >= 0 and torch.cuda.is_available(): self.droplstm = self.droplstm.cuda(self.gpu) self.hidden2tag = self.hidden2tag.cuda(self.gpu) self.lstm = self.lstm.cuda(self.gpu)
def __init__(self, data): super(WordSequence, self).__init__() print("build word sequence feature extractor: %s..."%(data.word_feature_extractor)) self.gpu = data.HP_gpu self.use_char = data.use_char # self.batch_size = data.HP_batch_size # self.hidden_dim = data.HP_hidden_dim self.droplstm = nn.Dropout(data.HP_dropout) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer self.wordrep = WordRep(data) self.input_size = data.word_emb_dim if self.use_char: self.input_size += data.HP_char_hidden_dim if data.char_seq_feature == "ALL": self.input_size += data.HP_char_hidden_dim for idx in range(data.feature_num): self.input_size += data.feature_emb_dims[idx] # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.word_feature_extractor = data.word_feature_extractor if self.word_feature_extractor == "GRU": self.lstm = nn.GRU(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) elif self.word_feature_extractor == "LSTM": self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) elif self.word_feature_extractor == "CNN": # cnn_hidden = data.HP_hidden_dim self.word2cnn = nn.Linear(self.input_size, data.HP_hidden_dim) self.cnn_layer = data.HP_cnn_layer print "CNN layer: ", self.cnn_layer self.cnn_list = nn.ModuleList() self.cnn_drop_list = nn.ModuleList() self.cnn_batchnorm_list = nn.ModuleList() kernel = 3 pad_size = (kernel-1)/2 for idx in range(self.cnn_layer): self.cnn_list.append(nn.Conv1d(data.HP_hidden_dim, data.HP_hidden_dim, kernel_size=kernel, padding=pad_size)) self.cnn_drop_list.append(nn.Dropout(data.HP_dropout)) self.cnn_batchnorm_list.append(nn.BatchNorm1d(data.HP_hidden_dim)) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size) if self.gpu: self.droplstm = self.droplstm.cuda() self.hidden2tag = self.hidden2tag.cuda() if self.word_feature_extractor == "CNN": self.word2cnn = self.word2cnn.cuda() for idx in range(self.cnn_layer): self.cnn_list[idx] = self.cnn_list[idx].cuda() self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda() self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[idx].cuda() else: self.lstm = self.lstm.cuda()
def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordSequence, self).__init__() self.gpu = data.HP_gpu self.use_char = use_char # self.batch_size = data.HP_batch_size # self.hidden_dim = data.HP_hidden_dim self.droplstm = nn.Dropout(data.HP_dropout) self.bilstm_flag = True self.lstm_layer = 1 self.wordrep = WordRep(data, use_position, use_cap, use_postag, use_char) self.tune_wordemb = data.tune_wordemb self.input_size = data.word_emb_dim if self.use_char: self.input_size += data.HP_char_hidden_dim if use_cap: self.input_size += data.feature_emb_dims[data.feature_name2id['[Cap]']] if use_postag: self.input_size += data.feature_emb_dims[data.feature_name2id['[POS]']] self.use_position = use_position if self.use_position: self.input_size += 2*data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']] # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) if torch.cuda.is_available(): self.droplstm = self.droplstm.cuda(self.gpu) self.lstm = self.lstm.cuda(self.gpu)
def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordSequence, self).__init__() print("build word sequence feature extractor: %s..." % (data.word_feature_extractor)) self.gpu = data.HP_gpu self.use_char = use_char # self.batch_size = data.HP_batch_size # self.hidden_dim = data.HP_hidden_dim self.droplstm = nn.Dropout(data.HP_dropout) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer self.wordrep = WordRep(data, use_position, use_cap, use_postag, use_char) self.tune_wordemb = data.tune_wordemb self.input_size = data.word_emb_dim if self.use_char: self.input_size += data.HP_char_hidden_dim if data.char_feature_extractor == "ALL": self.input_size += data.HP_char_hidden_dim if use_cap: self.input_size += data.feature_emb_dims[ data.feature_name2id['[Cap]']] if use_postag: self.input_size += data.feature_emb_dims[ data.feature_name2id['[POS]']] self.use_position = use_position if self.use_position: self.input_size += 2 * data.re_feature_emb_dims[ data.re_feature_name2id['[POSITION]']] # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.word_feature_extractor = data.word_feature_extractor if self.word_feature_extractor == "GRU": self.lstm = nn.GRU(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) elif self.word_feature_extractor == "LSTM": self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) elif self.word_feature_extractor == "CNN": # cnn_hidden = data.HP_hidden_dim self.word2cnn = nn.Linear(self.input_size, data.HP_hidden_dim) self.cnn_layer = data.HP_cnn_layer print "CNN layer: ", self.cnn_layer self.cnn_list = nn.ModuleList() self.cnn_drop_list = nn.ModuleList() self.cnn_batchnorm_list = nn.ModuleList() kernel = 3 pad_size = (kernel - 1) / 2 for idx in range(self.cnn_layer): self.cnn_list.append( nn.Conv1d(data.HP_hidden_dim, data.HP_hidden_dim, kernel_size=kernel, padding=pad_size)) self.cnn_drop_list.append(nn.Dropout(data.HP_dropout)) self.cnn_batchnorm_list.append( nn.BatchNorm1d(data.HP_hidden_dim)) if torch.cuda.is_available(): self.droplstm = self.droplstm.cuda(self.gpu) if self.word_feature_extractor == "CNN": self.word2cnn = self.word2cnn.cuda(self.gpu) for idx in range(self.cnn_layer): self.cnn_list[idx] = self.cnn_list[idx].cuda(self.gpu) self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda( self.gpu) self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[ idx].cuda(self.gpu) else: self.lstm = self.lstm.cuda(self.gpu) self.frozen = False
def __init__(self, data, circul_time, deepth): super(WordSequence_circulationBiLSTM, self).__init__() print("Build word sequence feature extractor: %s..." % data.word_feature_extractor) self.circul_time = circul_time self.deepth = deepth self.hidden_dim = data.hidden_dim self.gather_output_mode = data.circul_gather_output_mode # self.gather_output_mode = 'add' self.gpu = data.gpu self.use_char = data.use_char self.use_trans = data.use_trans # self.batch_size = data.batch_size self.droplstm = nn.Dropout(data.dropout) self.bilstm_flag = data.bilstm self.lstm_layer = data.lstm_layer self.wordrep = WordRep(data) self.input_size = data.word_emb_dim if self.use_char: self.input_size += data.char_hidden_dim if data.char_seq_feature == "ALL": self.input_size += data.char_hidden_dim for idx in range(data.feature_num): self.input_size += data.feature_emb_dims[idx] if self.use_trans: self.input_size += data.trans_hidden_dim # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.hidden_dim // 2 else: lstm_hidden = data.hidden_dim self.word_feature_extractor = data.word_feature_extractor if self.word_feature_extractor == "GRU": self.lstm = nn.GRU(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) elif self.word_feature_extractor == "LSTM": if self.gather_output_mode == 'concat': lstm_hidden /= circul_time elif self.gather_output_mode == 'add': pass else: print('no such mode for gather output: {}'.format( self.gather_output_mode)) self.lstm_stack = [] for i in range(deepth): lstm_layer = [] for j in range(circul_time): if i == 0: lstm_layer.append( nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)) else: lstm_layer.append( nn.LSTM(self.hidden_dim, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)) self.lstm_stack.append(lstm_layer) elif self.word_feature_extractor == "CNN": # cnn_hidden = data.hidden_dim self.word2cnn = nn.Linear(self.input_size, data.hidden_dim) self.cnn_layer = data.cnn_layer print "CNN layer: ", self.cnn_layer self.cnn_list = nn.ModuleList() self.cnn_drop_list = nn.ModuleList() self.cnn_batchnorm_list = nn.ModuleList() kernel = 3 pad_size = (kernel - 1) / 2 for idx in range(self.cnn_layer): self.cnn_list.append( nn.Conv1d(data.hidden_dim, data.hidden_dim, kernel_size=kernel, padding=pad_size)) self.cnn_drop_list.append(nn.Dropout(data.dropout)) self.cnn_batchnorm_list.append(nn.BatchNorm1d(data.hidden_dim)) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.hidden_dim, data.label_alphabet_size) if self.gpu: self.droplstm = self.droplstm.cuda() self.hidden2tag = self.hidden2tag.cuda() if self.word_feature_extractor == "CNN": self.word2cnn = self.word2cnn.cuda() for idx in range(self.cnn_layer): self.cnn_list[idx] = self.cnn_list[idx].cuda() self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda() self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[ idx].cuda() else: for i in range(deepth): for j in range(circul_time): self.lstm_stack[i][j] = self.lstm_stack[i][j].cuda()