def __init__(self, data, type=1): print("Build char-word based NER Task...") super(CW_NER, self).__init__() self.gpu = data.HP_gpu label_size = data.label_alphabet_size self.type = type self.gaz_embed = Gaz_Embed(data, type) self.word_embedding = nn.Embedding(data.word_alphabet.size(), data.word_emb_dim) self.lstm = Gaz_BiLSTM(data, data.word_emb_dim + data.gaz_emb_dim, data.HP_hidden_dim) self.crf = CRF(data.label_alphabet_size, self.gpu) self.hidden2tag = nn.Linear(data.HP_hidden_dim * 2, data.label_alphabet_size + 2) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( random_embedding(data.word_alphabet_size, data.word_emb_dim)) if self.gpu: self.word_embedding = self.word_embedding.cuda() self.hidden2tag = self.hidden2tag.cuda()
def __init__(self, data): super(Char_NER, self).__init__() print("Build Character-based BiLSTM-CRF....") self.gpu = data.HP_gpu self.embedding_dim = data.char_emb_dim self.hidden_dim = data.HP_hidden_dim self.char_embedding = nn.Embedding(data.char_alphabet.size(), self.embedding_dim) self.lstm = Char_BiLSTM(data) self.hidden2tag = nn.Linear(self.hidden_dim * 2, data.label_alphabet_size + 2) self.crf = CRF(data.label_alphabet_size, self.gpu) if data.pretrain_char_embedding is not None: self.char_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_char_embedding) ) else: self.char_embedding.weight.data.copy_( random_embedding(data.char_alphabet.size(), self.embedding_dim) ) if self.gpu: self.char_embedding = self.char_embedding.cuda() self.hidden2tag = self.hidden2tag.cuda()
def __init__(self, data, type=1): """ Args: data: all the data information type: the type of strategy, 1 for avg, 2 for short first, 3 for long first """ print('build gaz embedding...') super(Gaz_Embed, self).__init__() self.gpu = data.HP_gpu self.data = data self.type = type self.gaz_dim = data.gaz_emb_dim self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(), data.gaz_emb_dim) self.dropout = nn.Dropout(p=0.5) if data.pretrain_gaz_embedding is not None: self.gaz_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_gaz_embedding)) else: self.gaz_embedding.weight.data.copy_( torch.from_numpy( random_embedding(data.gaz_alphabet.size(), data.gaz_emb_dim))) self.filters = [[1, 20], [2, 30]] if self.type == 4: # use conv, so we need to define some conv # here we use 20 1-d conv, and 30 2-d conv self.build_cnn(self.filters) ## also use highway, 2 layers highway # self.highway = Highway(self.gaz_dim, num_layers=2) # if self.gpu: # self.highway = self.highway.cuda() if self.type == 5: # use self-attention self.build_attention() if self.gpu: self.gaz_embedding = self.gaz_embedding.cuda()