def getloss_batch(self, have_action_batch, batch_buffer, batch_stack, batch_action, batch_output, batch_valid_actions, batch_real_actions=None): predict_actions = [] losses = [] if self.mode == 'train': lstms_output = [torch.cat( [batch_buffer[batch_idx][0], batch_stack[batch_idx][0][0], batch_output[batch_idx][0][0], batch_action[batch_idx]], 1) for batch_idx in have_action_batch] elif self.mode == 'predict': lstms_output = [torch.cat( [batch_buffer[batch_idx][0], batch_stack[batch_idx][0][0], batch_output[batch_idx][0][0], batch_action[batch_idx][0][0]], 1) for batch_idx in have_action_batch] lstms_output = torch.cat([i for i in lstms_output], 0) hidden_output = torch.tanh(self.lstms_output_2_softmax(self.dropout(lstms_output))) logits = self.output_2_act(hidden_output) for idx in range(len(have_action_batch)): logit = logits[idx][ utils.variable(torch.LongTensor(batch_valid_actions[have_action_batch[idx]]), self.gpu_triger)] valid_action_tbl = {a: i for i, a in enumerate(batch_valid_actions[have_action_batch[idx]])} log_probs = torch.nn.functional.log_softmax(logit) action_idx = torch.max(log_probs.cpu(), 0)[1].item() action_predict = batch_valid_actions[have_action_batch[idx]][action_idx] predict_actions.append(action_predict) if self.mode == 'train': if log_probs is not None: losses.append(log_probs[valid_action_tbl[batch_real_actions[have_action_batch[idx]].item()]]) if self.mode == 'predict': losses = None return predict_actions, losses
def generate_ner(ner_model, fileout, dataset_loader, action2idx, word2idx, if_cuda): idx2action = {v: k for k, v in action2idx.items()} idx2word = {v: k for k, v in word2idx.items()} ner_model.eval() for feature in itertools.chain.from_iterable( dataset_loader): # feature : torch.Size([4, 17]) fe_v = utils.variable(feature, if_cuda) _, pre_action = ner_model.forward(fe_v) feature_seq = [] for sent in fe_v.squeeze(0).data.tolist(): feature_seq.append([idx2word[w_idx] for w_idx in sent]) for sent_idx in range(len(pre_action)): entitys = [] ner_start_pos = -1 word_start = -1 word_idx = 0 for ac_idx in range(len(pre_action[sent_idx])): if idx2action[pre_action[sent_idx][ac_idx]].startswith( 'S') and ner_start_pos < 0: ner_start_pos = ac_idx word_start = word_idx word_idx += 1 elif idx2action[pre_action[sent_idx][ac_idx]].startswith( 'O') and ner_start_pos >= 0: ner_start_pos = -1 word_idx += 1 elif idx2action[pre_action[sent_idx][ac_idx]].startswith( 'R') and ner_start_pos >= 0: ent = [] ent.append(" ".join( feature_seq[sent_idx][word_start:word_idx])) ent.append([ner_start_pos, ac_idx - 1]) ent.append( idx2action[pre_action[sent_idx][ac_idx]].split('-')[1]) entitys.append(ent) ner_start_pos = -1 else: word_idx += 1 fileout.write("%s\nEntities: " % (" ".join(feature_seq[sent_idx]))) for i in range(len(entitys)): fileout.write("%s-%s " % (entitys[i][0], entitys[i][2])) fileout.write("\n\n")
def forward(self, sentence, actions=None, hidden=None): # sentence = sentence.squeeze(0) if actions is None: mode = 'predict' else: mode = 'train' self.set_seq_size(sentence) word_embeds = self.dropout_e(self.word_embeds(sentence)) word_embeds = word_embeds.squeeze(0) if mode == 'train': # actions = actions.squeeze(0) action_embeds = self.dropout_e(self.action_embeds(actions)) action_embeds = action_embeds.squeeze(0) relation_embeds = self.dropout_e(self.relation_embeds(actions)) relation_embeds = relation_embeds.squeeze(0) actions = actions.squeeze(0) sentence = sentence.squeeze(0) action_count = 0 buffer = StackRNN(self.buffer_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) stack = StackRNN(self.stack_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) action = StackRNN(self.action_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) output = StackRNN(self.output_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) ent_f = StackRNN(self.entity_forward_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) ent_b = StackRNN(self.entity_backward_lstm, self.lstm_initial, self.dropout, self._rnn_get_output, self.empty_emb) predict_actions = [] pre_actions = [] losses = [] sentence_array = sentence.data.tolist() token_embedding = list() for word_idx in range(len(sentence_array)): if self.use_spelling: if sentence_array[word_idx] == 0: tok_rep = torch.cat([word_embeds[word_idx].unsqueeze(0), self.unk_char_embeds], 1) elif sentence_array[word_idx] != 1: word = sentence_array[word_idx] chars_in_word = [self.char2idx[char] for char in self.idx2word[word]] chars_Tensor = utils.variable(torch.from_numpy(np.array(chars_in_word)), self.gpu_triger) chars_embeds = self.dropout_e(self.char_embeds(chars_Tensor.unsqueeze(0))) if self.char_structure == 'lstm': char_o, hidden = self.char_bi_lstm(chars_embeds.transpose(0, 1), hidden) char_out = torch.chunk(hidden[0].squeeze(1), 2, 0) tok_rep = torch.cat([word_embeds[word_idx].unsqueeze(0), char_out[0], char_out[1]], 1) elif self.char_structure == 'cnn': char = chars_embeds.unsqueeze(0) char = char.transpose(1, 2) char, _ = self.conv1d(char).max(dim=2) char = torch.tanh(char) tok_rep = torch.cat([word_embeds[word_idx].unsqueeze(0), char], 1) else: tok_rep = word_embeds[word_idx].unsqueeze(0) if word_idx == 0: token_embedding = tok_rep elif sentence_array[word_idx] != 1: token_embedding = torch.cat([token_embedding, tok_rep], 0) for i in range(token_embedding.size()[0]): tok_embed = token_embedding[token_embedding.size()[0]-1-i].unsqueeze(0) tok = sentence.data[token_embedding.size()[0]-1-i] buffer.push(tok_embed, (tok_embed, self.idx2word[tok])) while len(buffer) > 0 or len(stack) > 0: valid_actions = self.get_possible_actions(stack, buffer) log_probs = None if len(valid_actions)>1: lstms_output = torch.cat([buffer.embedding(), stack.embedding(), output.embedding(), action.embedding()], 1) hidden_output = torch.tanh(self.lstms_output_2_softmax(self.dropout(lstms_output))) if self.gpu_triger is True: logits = self.output_2_act(hidden_output)[0][torch.autograd.Variable(torch.LongTensor(valid_actions)).cuda()] else: logits = self.output_2_act(hidden_output)[0][torch.autograd.Variable(torch.LongTensor(valid_actions))] valid_action_tbl = {a: i for i, a in enumerate(valid_actions)} log_probs = torch.nn.functional.log_softmax(logits, dim=0) action_idx = torch.max(log_probs.cpu(), 0)[1][0].data.numpy()[0] action_predict = valid_actions[action_idx] pre_actions.append(action_predict) if mode == 'train': if log_probs is not None: losses.append(log_probs[valid_action_tbl[actions.data[action_count]]]) if mode == 'train': real_action = self.idx2action[actions.data[action_count]] act_embedding = action_embeds[action_count].unsqueeze(0) rel_embedding = relation_embeds[action_count].unsqueeze(0) elif mode == 'predict': real_action = self.idx2action[action_predict] action_predict_tensor = utils.variable(torch.from_numpy(np.array([action_predict])), self.gpu_triger) action_embeds = self.dropout_e(self.action_embeds(action_predict_tensor)) relation_embeds = self.dropout_e(self.relation_embeds(action_predict_tensor)) act_embedding = action_embeds[0].unsqueeze(0) rel_embedding = relation_embeds[0].unsqueeze(0) action.push(act_embedding,(act_embedding, real_action)) if real_action.startswith('S'): assert len(buffer) > 0 tok_buffer_embedding, buffer_token = buffer.pop() stack.push(tok_buffer_embedding, (tok_buffer_embedding, buffer_token)) elif real_action.startswith('O'): assert len(buffer) > 0 tok_buffer_embedding, buffer_token = buffer.pop() output.push(tok_buffer_embedding, (tok_buffer_embedding, buffer_token)) elif real_action.startswith('R'): ent ='' entity = [] assert len(stack) > 0 while len(stack) > 0: tok_stack_embedding, stack_token = stack.pop() entity.append([tok_stack_embedding, stack_token]) if len(entity) > 1: for i in range(len(entity)): ent_f.push(entity[i][0], (entity[i][0],entity[i][1])) ent_b.push(entity[len(entity)-i-1][0], (entity[len(entity)-i-1][0], entity[len(entity)-i-1][1])) ent += entity[i][1] ent += ' ' entity_input = self.dropout(torch.cat([ent_f.embedding(), ent_b.embedding()], 1)) else: ent_f.push(entity[0][0], (entity[0][0], entity[0][1])) ent_b.push(entity[0][0], (entity[0][0], entity[0][1])) ent = entity[0][1] entity_input = self.dropout(torch.cat([ent_f.embedding(), ent_b.embedding()], 1)) ent_f.clear() ent_b.clear() output_input = self.entity_2_output(torch.cat([entity_input, rel_embedding], 1)) output.push(output_input, (entity_input, ent)) action_count += 1 if len(losses) > 0: loss = -torch.sum(torch.cat(losses)) else: loss = -1 predict_actions.append(pre_actions) return loss, predict_actions
def forward(self, sentences, actions=None, hidden=None): if actions is not None: self.mode = "train" else: self.mode = "predict" self.set_batch_seq_size(sentences) # sentences [batch_size, max_len] word_embeds = self.dropout_e(self.word_embeds(sentences)) # [batch_size, max_len, embeddind_size] if self.mode == 'train': action_embeds = self.dropout_e(self.action_embeds(actions)) relation_embeds = self.dropout_e(self.relation_embeds(actions)) action_output, _ = self.ac_lstm(action_embeds.transpose(0, 1)) action_output = action_output.transpose(0, 1) lstm_initial = ( utils.xavier_init(self.gpu_triger, 1, self.hidden_dim), utils.xavier_init(self.gpu_triger, 1, self.hidden_dim)) sentence_array = sentences.data.cpu().numpy() sents_len = [] token_embedds = None for sent_idx in range(len(sentence_array)): count_words = 0 token_embedding = None for word_idx in reversed(range(len(sentence_array[sent_idx]))): if self.use_spelling: if sentence_array[sent_idx][word_idx] == 1: tok_rep = torch.cat([word_embeds[sent_idx][word_idx].unsqueeze(0), self.pad_char_embeds], 1) elif sentence_array[sent_idx][word_idx] == 0: count_words += 1 tok_rep = torch.cat([word_embeds[sent_idx][word_idx].unsqueeze(0), self.unk_char_embeds], 1) else: count_words += 1 word = sentence_array[sent_idx][word_idx] chars_in_word = [self.char2idx[char] for char in self.idx2word[word]] chars_Tensor = utils.variable(torch.from_numpy(np.array(chars_in_word)), self.gpu_triger) chars_embeds = self.dropout_e(self.char_embeds(chars_Tensor)) if self.char_structure == 'lstm': char_o, hidden = self.char_bi_lstm(chars_embeds.unsqueeze(1), hidden) char_out = torch.chunk(hidden[0].squeeze(1), 2, 0) tok_rep = torch.cat( [word_embeds[sent_idx][word_idx].unsqueeze(0), char_out[0], char_out[1]], 1) elif self.char_structure == 'cnn': char, _ = self.conv1d(chars_embeds.unsqueeze(0).transpose(1, 2)).max( dim=2) # [batch_size, Embedding_sie, sentence_len] --> [batch_size, output_dim, sentence_len+padding_num*2 - kernel_num + 1] char = torch.tanh(char) tok_rep = torch.cat([word_embeds[sent_idx][word_idx].unsqueeze(0), char], 1) else: if sentence_array[sent_idx][word_idx] != 1: count_words += 1 tok_rep = word_embeds[sent_idx][word_idx].unsqueeze(0) if token_embedding is None: token_embedding = tok_rep else: token_embedding = torch.cat([token_embedding, tok_rep], 0) sents_len.append(count_words) if token_embedds is None: token_embedds = token_embedding.unsqueeze(0) else: token_embedds = torch.cat([token_embedds, token_embedding.unsqueeze(0)], 0) tokens = token_embedds.transpose(0, 1) tok_output, hidden = self.lstm(tokens) # [max_len, batch_size, hidden_dim] tok_output = tok_output.transpose(0, 1) buffer = [[] for i in range(self.batch_size)] losses = [[] for i in range(self.batch_size)] right = [0 for i in range(self.batch_size)] predict_actions = [[] for i in range(self.batch_size)] output = [[[lstm_initial, "<pad>"]] for i in range(self.batch_size)] if self.mode == 'predict': action = [[[lstm_initial, "<pad>"]] for i in range(self.batch_size)] for idx in range(tok_output.size(0)): for word_idx in range(tok_output.size(1)): buffer[idx].append([tok_output[idx][word_idx].unsqueeze(0), token_embedds[idx][word_idx].unsqueeze(0), self.idx2word[sentence_array[idx][tok_output.size(1) - 1 - word_idx]]]) stack = [[[lstm_initial, "<pad>"]] for i in range(self.batch_size)] for act_idx in range(self.seq_length): batch_buffer = [b[-1] for b in buffer] if self.mode == 'train': if act_idx == 0: batch_action = [lstm_initial[0] for a in range(self.batch_size)] else: batch_action = [a[act_idx - 1].unsqueeze(0) for a in action_output] batch_relation = [r[act_idx].unsqueeze(0) for r in relation_embeds] elif self.mode == 'predict': batch_action = [a[-1] for a in action] batch_output = [o[-1] for o in output] batch_stack = [s[-1] for s in stack] have_action_batch_1 = [i for i in range(len(sents_len)) if sents_len[i] > 0] have_action_batch_2 = [i for i in range(len(batch_stack)) if batch_stack[i][1] != '<pad>'] have_action_batch = list(set(have_action_batch_1).union(set(have_action_batch_2))) if len(have_action_batch) > 0: batch_valid_actions = self.get_possible_actions_batch(batch_stack, sents_len, have_action_batch) if self.mode == 'train': batch_real_action = [ac[act_idx] for ac in actions.data] batch_pred, batch_loss = self.getloss_batch(have_action_batch, batch_buffer, batch_stack, batch_action, batch_output, batch_valid_actions, batch_real_action) batch_real_action = [self.idx2action[ac.item()] for ac in batch_real_action] elif self.mode == 'predict': batch_pred, batch_loss = self.getloss_batch(have_action_batch, batch_buffer, batch_stack, batch_action, batch_output, batch_valid_actions) pred_action_tensor = utils.variable(torch.from_numpy(np.array(batch_pred)), self.gpu_triger) predict_actions_embed = self.dropout_e(self.action_embeds(pred_action_tensor)) ac_lstm_h, ac_lstm_c = self.action_lstm(predict_actions_embed, (torch.cat( [action[ac_idx][-1][0][0] for ac_idx in range(len(action)) if ac_idx in have_action_batch]), torch.cat( [action[ac_idx][-1][0][1] for ac_idx in range(len(action)) if ac_idx in have_action_batch]))) i = 0 for batch_idx in range(self.batch_size): if batch_idx in have_action_batch: predict_actions[batch_idx].append(batch_pred[i]) if self.mode == 'train': losses[batch_idx].append(batch_loss[i]) elif self.mode == 'predict': action[batch_idx].append([(ac_lstm_h[i].unsqueeze(0), ac_lstm_c[i].unsqueeze(0)), self.idx2action[batch_pred[i]]]) i += 1 else: if self.mode == 'predict': action[batch_idx].append([lstm_initial, "<pad>"]) if self.mode == 'predict': batch_real_action = [ac[-1][1] for ac in action] relation_embeds = self.dropout_e(self.relation_embeds( utils.variable(torch.from_numpy(np.array([self.action2idx[a] for a in batch_real_action])), self.gpu_triger))) batch_relation = [relation_embed.unsqueeze(0) for relation_embed in relation_embeds] batch_shift_idx = [idx for idx in range(len(batch_real_action)) if batch_real_action[idx].startswith('S')] batch_out_idx = [idx for idx in range(len(batch_real_action)) if batch_real_action[idx].startswith('O')] batch_reduce_idx = [idx for idx in range(len(batch_real_action)) if batch_real_action[idx].startswith('R')] # batch_relation = [batch_relation[i] for i in batch_reduce_idx] if len(batch_shift_idx) > 0: buffer, stack = self.batch_shift_out('S', buffer, stack, batch_shift_idx) for i in range(len(sents_len)): if i in batch_shift_idx: sents_len[i] -= 1 if len(batch_out_idx) > 0: buffer, output = self.batch_shift_out('O', buffer, output, batch_out_idx) for i in range(len(sents_len)): if i in batch_out_idx: sents_len[i] -= 1 if len(batch_reduce_idx) > 0: stack, output = self.batch_reduce(stack, output, batch_relation, batch_reduce_idx) loss = 0 if self.mode == 'train': for idx in range(self.batch_size): loss += -torch.sum(torch.cat(losses[idx])) return loss, predict_actions