def __init__(self, lang): if lang == LANG.DE: self.spre = ShortestPathRE().de_lang() self.pbre = PatternBasedRE().de_lang() #self.entity_extractor = FlairEntityExtractor().de_lang() self.entity_extractor = SpacyEntityExtractor().de_lang() else: self.spre = ShortestPathRE().en_lang() self.pbre = PatternBasedRE().en_lang() #self.entity_extractor = FlairEntityExtractor().en_lang() self.entity_extractor = SpacyEntityExtractor().en_lang()
class RelationExtractor: def __init__(self, lang): if lang == LANG.DE: self.spre = ShortestPathRE().de_lang() self.pbre = PatternBasedRE().de_lang() #self.entity_extractor = FlairEntityExtractor().de_lang() self.entity_extractor = SpacyEntityExtractor().de_lang() else: self.spre = ShortestPathRE().en_lang() self.pbre = PatternBasedRE().en_lang() #self.entity_extractor = FlairEntityExtractor().en_lang() self.entity_extractor = SpacyEntityExtractor().en_lang() def extract_relations(self, text, plot_graph=False, validate=False, out_val_file=None): extracted_relations = [] for sentence in sent_tokenize(text): entities, per_entities = self.entity_extractor.extract_entities(sentence) logger.debug(f'Extracted entities: {entities}') # Shortest path relation extraction if len(per_entities) > 0: # PER-PER or USR-PER extracted_relations = self.spre.extract_sp_relation(entities, per_entities, sentence, plot_graph) # Pattern based relation extraction else: # USR-REL extracted_relations = self.pbre.extract_rel(sentence) if validate: with open(out_val_file, 'a', encoding='utf-8') as f: validated = f'{extracted_relations}; {sentence}\n' f.write(validated) return extracted_relations
def test_extract_rel_8(): utterance = u'''sadly i am on call with my uncle in the hospital so i never let mine die''' pbre = PatternBasedRE.en_lang() result = pbre.extract_rel(utterance, plot_tree=False) assert result == [('uncle-of', 'USER')]
def test_extract_rel_6(): utterance = 'i miss my wife and kids so much' pbre = PatternBasedRE().en_lang() result = pbre.extract_rel(utterance, plot_tree=False) assert result == [('wife-of', 'USER')]
def test_extract_rel_7(): utterance = 'no , my dad taught me good music and good work ethics.' pbre = PatternBasedRE.en_lang() result = pbre.extract_rel(utterance, plot_tree=False) assert result == [('father-of', 'USER')]
def test_extract_rel_5(): utterance = 'Ich habe einen Bruder' pbre = PatternBasedRE().de_lang() result = pbre.extract_rel(utterance, plot_tree=False) assert result == [('brother-of', 'USER')]
def test_extract_rel_1(): utterance = 'I have older brother who lives in Berlin' pbre = PatternBasedRE().en_lang() result = pbre.extract_rel(utterance, plot_tree=False) assert result == [('brother-of', 'USER')]
def test_extract_rel_4(): utterance = 'My little sister Lisa is moving to London' pbre = PatternBasedRE().en_lang() result = pbre.extract_rel(utterance, plot_tree=True) assert result == [('sister-of', 'USER')]
def test_extract_rel_3(): utterance = 'I have one brother' pbre = PatternBasedRE().en_lang() result = pbre.extract_rel(utterance, plot_tree=True) assert result == [('brother-of', 'USER')]
def test_extract_rel_2(): utterance = 'I have two sisters' pbre = PatternBasedRE().en_lang() result = pbre.extract_rel(utterance, plot_tree=True) assert result == [('sister-of', 'USER')]