def rule0_func(self, sent_data): sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data ent1_token = ent_text_to_ent(ent1, sent_data) ent2_token = ent_text_to_ent(ent2, sent_data) if ent1_token.label_ in self.persons_ents and ent2_token.label_ in self.places_ents: return True return False
def remove_non_relevant_samples(data): filtered_data = [] for sent_data, y in data: sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data ent1_token = ent_text_to_ent(ent1, sent_data) ent2_token = ent_text_to_ent(ent2, sent_data) if ent1_token.label_ in person_ents and ent2_token.label_ in places_ents: filtered_data.append((sent_data, y)) return filtered_data
def rule1_func(self, sent_data): sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data ent1_token = ent_text_to_ent(ent1, sent_data) ent2_token = ent_text_to_ent(ent2, sent_data) if ent1_token.label_ in self.places_ents and ent2_token.label_ in self.persons_ents: if "'s" in words_between_ents(sent_data) and ents_between( ent1_token, ent2_token, sent_data) == 0: return True return False
def words_between_ents(sent_data): sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data words_in_sent = [x.text for x in sent_tokens] ent1_token = ent_text_to_ent(ent1, sent_data) ent2_token = ent_text_to_ent(ent2, sent_data) words_between = [] if ent1_token.start > ent2_token.start: return words_between for i in range(ent1_token.end, ent2_token.start): words_between.append(words_in_sent[i]) return words_between