def spo_to_seq(self, text: str, spo_list: List[Dict[str, str]], s_fst: bool = True) -> Dict[int, List[int]]: dic = {} tokens = self.hyper.tokenizer(text) for triplet in spo_list: # object = triplet["object"] # subject = triplet["subject"] object = self.hyper.tokenizer(triplet["object"]) subject = self.hyper.tokenizer(triplet["subject"]) object_pos = find(tokens, object) + len(object) - 1 subject_pos = find(tokens, subject) + len(subject) - 1 object_pos = text.find(object) + len(object) - 1 relation_pos = self.relation_vocab[triplet["predicate"]] # subject_pos = text.find(subject) + len(subject) - 1 # dangerous!!! # ------------------------------------------------- # if not s_fst: # ops (default spo) object_pos, subject_pos = subject_pos, object_pos # ------------------------------------------------- # if subject_pos in dic: dic[subject_pos].extend([relation_pos, object_pos]) else: dic[subject_pos] = [relation_pos, object_pos] # if max(map(len, dic.values())) > self.hyper.max_decode_len * 2: # print(dic) return dic
def spo_to_selection( self, text: str, spo_list: List[Dict[str, str]]) -> List[Dict[str, int]]: tokens = self.hyper.tokenizer(text) selection = [] for triplet in spo_list: object = self.hyper.tokenizer(triplet["object"]) subject = self.hyper.tokenizer(triplet["subject"]) object_pos = find(tokens, object) + len(object) - 1 subject_pos = find(tokens, subject) + len(subject) - 1 # object_pos = text.find(object) + len(object) - 1 relation_pos = self.relation_vocab[triplet["predicate"]] # subject_pos = text.find(subject) + len(subject) - 1 selection.append({ "subject": subject_pos, "predicate": relation_pos, "object": object_pos, }) return selection
def spo_to_seq(self, text: str, spo_list: List[Dict[str, str]]) -> Dict[int, List[int]]: dic = {} tokens = self.hyper.tokenizer(text) result = [] for triplet in spo_list: object = self.hyper.tokenizer(triplet["object"]) subject = self.hyper.tokenizer(triplet["subject"]) object_pos = find(tokens, object) + len(object) - 1 subject_pos = find(tokens, subject) + len(subject) - 1 relation_pos = self.relation_vocab[triplet["predicate"]] result.extend([relation_pos, subject_pos, object_pos]) # result.append(self.relation_vocab[NO_RELATION]) return result
def to_ent(outp): # side effect! ent1, ent2 = [[0] * len(tokens) for _ in range(2)] for name in outp: # # TODO # print(tokens) # print(name) # exit() id = find(tokens, self.hyper.tokenizer(name)) ent1[id] = 1 ent2[id + len(self.hyper.tokenizer(name)) - 1] = 1 return ent1, ent2
def to_in_key(inp, name): # side effect! if not inp: return 0, 0 if name == "predicate": rel_in = self.relation_vocab[inp] out = rel_in else: k1 = find(tokens, self.hyper.tokenizer(inp)) k2 = k1 + len(self.hyper.tokenizer(inp)) - 1 out = k1, k2 return out
def spo_to_bio(self, text: str, entities: List[str]) -> List[str]: text = self.hyper.tokenizer(text) bio = ["O"] * len(text) for e in entities: begin = find(text, self.hyper.tokenizer(e)) # begin = text.find(e) end = begin + len(self.hyper.tokenizer(e)) - 1 assert end <= len(text) bio[begin] = "B" for i in range(begin + 1, end + 1): bio[i] = "I" return bio