def get_drt(): if request.method != 'POST': return [] data = json.loads(request.data) text = data['text'] discourse = Discourse(text) writer = DRTTripletsWriter() triplets = discourse.apply(writer) return jsonify(triplets)
def test_multi_sentence_anaphora_feminine_names(self): text = "Jane is happy. She is a carpenter" discourse = Discourse(text) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('Jane_0', 'HAS_ROLE', 'carpenter')] self.assertEqual(triplets, expected_triplets)
def test_pronoun_coreference(self): sentence = 'John drove home where he has a cat.' discourse = Discourse(sentence) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('John_0', 'OWN', 'cat')] self.assertEqual(triplets, expected_triplets)
def _substitute_text_in_match_statement_with_graph(text, substitution_triggers): drs_cleaner = DrsNERCleaner(substitution_triggers) p = re.compile('MATCH.*\"(.*)\"') lst = p.findall(text) if not lst: p = re.compile('MATCH.*\'(.*)\'') lst = p.findall(text) for item in lst: try: drs = Discourse(item).connected_components[0] except IndexError: _logger.warning('Cannot use Discourse on %s' % item[:200]) drs = Drs.create_from_natural_language(item) drs = drs.apply(drs_cleaner) text = text.replace('"' + item + '"', str(drs)) return text
def test_coreference_is_joined_in_graph(self): text = "John is ginger. He is a carpenter. test. Jane is blond. She is a carpenter. " discourse = Discourse(text) expected_drs = Drs.create_from_predicates_string( "{}(a), {}(b), {'type': 'REFERS_TO'}(a,b)") lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
def test_multiple_people_same_rule(self): text = "John is ginger. He is a carpenter. test. Jane is blond. She is a carpenter. " discourse = Discourse(text) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('John_0', 'HAS_ROLE', 'carpenter'), ('Jane_1', 'HAS_ROLE', 'carpenter')] self.assertTrue(triplets, expected_triplets)
def get_triplets(): if request.method != 'POST': return [] data = json.loads(request.data) text = data['text'] discourse = Discourse(text) extractor = Extractor(discourse, knowledge) triplets = extractor.extract() return jsonify(transform_triplets_into_api_edges_and_nodes(triplets))
def test_multiple_matcing(self): text = "John Smith is blond. He is a carpenter. There is no reason to panic. Sarah Doe is ginger. She is a carpenter." discourse = Discourse(text) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('John_Smith_0', 'HAS_BLOND_ROLE', 'carpenter'), ('Sarah_Doe_1', 'HAS_GINGER_ROLE', 'carpenter'), ('John_0', 'HAS_ROLE', 'carpenter'), ('Jane_1', 'HAS_ROLE', 'carpenter')] self.assertTrue(triplets, expected_triplets)
def test_asimov_wiki(self): text = open(os.path.join(_path, '../data/wiki_asimov.txt')).read() discourse = Discourse(text) extractor = Extractor(discourse, _knowledge) triplets = extractor.extract() expected_triplets = [('Isaac_Asimov_0|Asimov_0', 'JOB_TITLE', 'writer'), ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'works'), ('Isaac_Asimov_0|Asimov_0', 'OWNS', 'books')] self.assertTrue(triplets, expected_triplets) expected_drs = Drs.create_from_predicates_string( "{}(a), {'word': 'Boston_University'}(b), {'type': 'at'}(a,b)") lst = discourse._discourse.apply(DrsMatcher(expected_drs, metric)) is_match = len(lst) > 1 self.assertTrue(is_match)
import os from pynsett.discourse import Discourse from pynsett.extractor import Extractor from pynsett.knowledge import Knowledge _path = os.path.dirname(__file__) text = "Jane was born on 10 August 1979." knowledge = Knowledge() knowledge.add_rules(open(os.path.join(_path, '../rules/test.rules')).read()) discourse = Discourse(text) extractor = Extractor(discourse, knowledge) triplets = extractor.extract() for triplet in triplets: print(triplet)