Пример #1
0
 def test_stanford_tagger_is_called_if_found(self):
     doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
     expected_postags = ['DT', 'NN', '.', 'CC', 'DT', 'JJ', '.', 'RB', '.']
     tag = StanfordTaggerRunner()
     tag(doc)
     self.assertTrue(doc.was_preprocess_step_done(PreProcessSteps.tagging))
     self.assertEqual(doc.postags, expected_postags)
Пример #2
0
 def test_stanford_tagger_is_called_if_found(self):
     doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
     expected_postags = ['DT', 'NN', '.', 'CC', 'DT', 'JJ', '.', 'RB', '.']
     tag = StanfordTaggerRunner()
     tag(doc)
     self.assertTrue(doc.was_preprocess_done(PreProcessSteps.tagging))
     postags = doc.get_preprocess_result(PreProcessSteps.tagging)
     self.assertEqual(postags, expected_postags)
Пример #3
0
 def test_tagger_runner_overriding_when_selected(self):
     doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
     postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents]
     postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents]
     tag = TaggerRunner(postagger1, override=True)
     tag(doc)
     tag.postagger = postagger2  # XXX: accessing implementation
     tag(doc)
     postags = doc.get_preprocess_result(PreProcessSteps.tagging)
     self.assertTrue(all(x == 'B' for x in postags))
Пример #4
0
    def test_tagger_runner_is_calling_postagger(self):
        doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
        expected_postags = [['DT', 'NN', '.'], ['CC', 'DT', 'JJ', '.'], ['RB', '.']]
        i = iter(expected_postags)

        def postagger(sents):
            return (zip(sent, next(i)) for sent in sents)
        tag = TaggerRunner(postagger)
        tag(doc)
        self.assertTrue(doc.was_preprocess_step_done(PreProcessSteps.tagging))
        self.assertEqual(doc.postags, sum(expected_postags, []))
Пример #5
0
    def test_tagger_runner_is_calling_postagger(self):
        doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
        expected_postags = [['DT', 'NN', '.'], ['CC', 'DT', 'JJ', '.'], ['RB', '.']]
        i = iter(expected_postags)

        def postagger(sents):
            return (zip(sent, next(i)) for sent in sents)
        tag = TaggerRunner(postagger)
        tag(doc)
        self.assertTrue(doc.was_preprocess_done(PreProcessSteps.tagging))
        postags = doc.get_preprocess_result(PreProcessSteps.tagging)
        self.assertEqual(postags, sum(expected_postags, []))
Пример #6
0
 def test_stanford_ner_is_called_if_found(self):
     doc = SentencedIEDocFactory(
         text='Rami Eid is studying . At Stony Brook University in NY')
     ner_runner = StanfordNERRunner()
     ner_runner(doc)
     self.assertTrue(doc.was_preprocess_done(PreProcessSteps.ner))
     entities = doc.get_preprocess_result(PreProcessSteps.ner)
     self.assertEqual(len(entities), 2)
     self.assertEqual(entities[0].offset, 0)
     self.assertEqual(entities[0].offset_end, 2)
     self.assertEqual(entities[0].entity.kind, 'person')
     self.assertEqual(entities[1].offset, 6)
     self.assertEqual(entities[1].offset_end, 9)
     self.assertEqual(entities[1].entity.kind, 'organization')
Пример #7
0
 def test_tagger_runner_not_overriding_by_default(self):
     doc = SentencedIEDocFactory(
         text='Some sentence. And some other. Indeed!')
     postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents]
     postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents]
     tag = TaggerRunner(postagger1)
     tag(doc)
     tag.postagger = postagger2  # XXX: accessing implementation
     tag(doc)
     self.assertTrue(all(x == 'A' for x in doc.postags))
Пример #8
0
    def test(self):
        doc = SentencedIEDocFactory(
            text="Chase notes she's negative for HIV and Hepatitis C")

        lit_tagger_runner = LiteralNERRunner(['disease'],
                                             [self.tmp_file1.name])
        lit_tagger_runner(doc)

        # (the tokenizer splits she's in two parts)
        entities_triples = [(6, 7, 'disease'), (8, 10, 'disease')]

        self.assertTrue(doc.was_preprocess_done(PreProcessSteps.ner))
        entities = doc.get_preprocess_result(PreProcessSteps.ner)

        self.assertEqual(len(entities), len(entities_triples))
        for e, (offset, offset_end, kind) in zip(entities, entities_triples):
            self.assertEqual(e.offset, offset)
            self.assertEqual(e.offset_end, offset_end)
            self.assertEqual(e.entity.kind, kind)
Пример #9
0
 def test_ner_runner_finds_consecutive_entities(self):
     doc = SentencedIEDocFactory(
         text='The student Rami Eid Stony Brook University in NY')
     self.check_ner(doc, [(2, 4, 'person'), (4, 7, 'organization')])
Пример #10
0
 def test_ner_runner_is_calling_ner(self):
     doc = SentencedIEDocFactory(
         text='Rami Eid is studying . At Stony Brook University in NY')
     self.check_ner(doc, [(0, 2, 'person'), (6, 9, 'organization')])