def process(self, text): with torch.no_grad(): _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text] max_len = len(_X) _X_Len = torch.tensor([len(_X)], dtype=torch.long, device=device) _X = torch.tensor([_X], dtype=torch.long, device=device) pred_tags = self.model(_X, _X_Len)[0] pred_tags = [tag_list[_] for _ in pred_tags] result = [] pred_B_idx = [i for i, l in enumerate(pred_tags) if 'B' in l] for i in pred_B_idx: e = text[i] e_n = pred_tags[i] j = i + 1 while j < len(pred_tags): if pred_tags[j] == 'O' or 'B' in pred_tags[j]: break e += text[j] j += 1 result.append((e, str(i), e_n.split('_')[-1])) # if '体检' in text: # result.append(('体检', str(text.index('体检')), 'diagnosis')) return result
def process(self, text): x_ids = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text] x_mask = [1] * len(x_ids) x_ids = torch.tensor([x_ids], dtype=torch.long, device=device) x_mask = torch.tensor([x_mask], dtype=torch.long, device=device) x_seg = torch.zeros(*x_ids.size(), dtype=torch.long, device=device) with torch.no_grad(): k = self.model(x_ids, x_mask, x_seg) k = torch.softmax(k, dim=-1) k = k[0, :].detach().cpu().numpy() nodes = [dict(zip(map(str, range(9)), _k)) for _k in k] tags = viterbi(nodes) result = [] for ts in re.finditer('(12+)|(34+)|(56+)|(78+)', tags): r = text[ts.start():ts.end()] r = ''.join(r) result.append((r, str(ts.start()), tag_list[int(ts.group()[0])].split('_')[-1])) if '体检' in text: result.append(('体检', str(text.index('体检')), 'diagnosis')) return result
def __iter__(self): idxs = list(range(len(self.data))) np.random.shuffle(idxs) X, S, X_MASK = [], [], [] for i in idxs: text, label = self.data[i] x = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text] x_mask = [1] * len(x) s = [tag_dictionary[l] for l in label] X.append(x) S.append(s) X_MASK.append(x_mask) if len(X) == self.batch_size or i == idxs[-1]: X = torch.tensor(seq_padding(X), dtype=torch.long) S = torch.tensor(seq_padding(S), dtype=torch.long) X_MASK = torch.tensor(seq_padding(X_MASK), dtype=torch.long) X_SEG = torch.zeros(*X.size(), dtype=torch.long) yield [X, S, X_MASK, X_SEG] X, S, X_MASK = [], [], []
def extract_items(text_in): _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text_in] _X_MASK = [1] * len(_X) _X = torch.tensor([_X], dtype=torch.long, device=device) _X_MASK = torch.tensor([_X_MASK], dtype=torch.long, device=device) _X_SEG = torch.zeros(*_X.size(), dtype=torch.long, device=device) with torch.no_grad(): _k = subject_model(_X, _X_SEG, _X_MASK) _k = torch.softmax(_k, dim=-1) _k = _k[0, :].detach().cpu().numpy() nodes = [dict(zip(list(map(str, range(9))), k)) for k in _k] tags = viterbi(nodes) result = [] for ts in re.finditer('(12+)|(34+)|(56+)|(78+)', tags): r = text_in[ts.start():ts.end()] r = ''.join(r) result.append( (r, str(ts.start()), tag_list[int(ts.group()[0])].split('_')[-1])) return result
T = [] B_idx = [i for i, l in enumerate(mention) if 'B' in l] for i in B_idx: e = text[i] e_n = mention[i] j = i + 1 while j < len(mention): if mention[j] == 'O' or 'B' in mention[j]: break e += text[j] j += 1 T.append((e, str(i), e_n.split('_')[-1])) # T.append((e, str(i))) with torch.no_grad(): _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text] max_len = len(_X) _X_Len = torch.tensor([len(_X)], dtype=torch.long, device=device) _X = torch.tensor([_X], dtype=torch.long, device=device) pred_tags = model(_X, _X_Len)[0] pred_tags = [tag_list[_] for _ in pred_tags] R = [] pred_B_idx = [i for i, l in enumerate(pred_tags) if 'B' in l] for i in pred_B_idx: e = text[i] e_n = pred_tags[i] j = i + 1 while j < len(pred_tags):
T = [] B_idx = [i for i, l in enumerate(mention) if 'B' in l] for i in B_idx: e = text[i] e_n = mention[i] j = i + 1 while j < len(mention): if mention[j] == 'O' or 'B' in mention[j]: break e += text[j] j += 1 # T.append((e, str(i), e_n.split('_')[-1])) T.append((e, str(i))) x_ids = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text] x_mask = [1] * len(x_ids) x_ids = torch.tensor([x_ids], dtype=torch.long, device=device) x_mask = torch.tensor([x_mask], dtype=torch.long, device=device) x_seg = torch.zeros(*x_ids.size(), dtype=torch.long, device=device) with torch.no_grad(): try: k = model(x_ids, x_mask, x_seg) k = torch.softmax(k, dim=-1) kk = k[0, :].detach().cpu().numpy() except Exception: print(f'text: {text}, k:{k}')