def test_stanford_tagger_is_called_if_found(self): doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!') expected_postags = ['DT', 'NN', '.', 'CC', 'DT', 'JJ', '.', 'RB', '.'] tag = StanfordTaggerRunner() tag(doc) self.assertTrue(doc.was_preprocess_step_done(PreProcessSteps.tagging)) self.assertEqual(doc.postags, expected_postags)
def test_stanford_tagger_is_called_if_found(self): doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!') expected_postags = ['DT', 'NN', '.', 'CC', 'DT', 'JJ', '.', 'RB', '.'] tag = StanfordTaggerRunner() tag(doc) self.assertTrue(doc.was_preprocess_done(PreProcessSteps.tagging)) postags = doc.get_preprocess_result(PreProcessSteps.tagging) self.assertEqual(postags, expected_postags)
def test_tagger_runner_overriding_when_selected(self): doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!') postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents] postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents] tag = TaggerRunner(postagger1, override=True) tag(doc) tag.postagger = postagger2 # XXX: accessing implementation tag(doc) postags = doc.get_preprocess_result(PreProcessSteps.tagging) self.assertTrue(all(x == 'B' for x in postags))
def test_tagger_runner_is_calling_postagger(self): doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!') expected_postags = [['DT', 'NN', '.'], ['CC', 'DT', 'JJ', '.'], ['RB', '.']] i = iter(expected_postags) def postagger(sents): return (zip(sent, next(i)) for sent in sents) tag = TaggerRunner(postagger) tag(doc) self.assertTrue(doc.was_preprocess_step_done(PreProcessSteps.tagging)) self.assertEqual(doc.postags, sum(expected_postags, []))
def test_tagger_runner_is_calling_postagger(self): doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!') expected_postags = [['DT', 'NN', '.'], ['CC', 'DT', 'JJ', '.'], ['RB', '.']] i = iter(expected_postags) def postagger(sents): return (zip(sent, next(i)) for sent in sents) tag = TaggerRunner(postagger) tag(doc) self.assertTrue(doc.was_preprocess_done(PreProcessSteps.tagging)) postags = doc.get_preprocess_result(PreProcessSteps.tagging) self.assertEqual(postags, sum(expected_postags, []))
def test_stanford_ner_is_called_if_found(self): doc = SentencedIEDocFactory( text='Rami Eid is studying . At Stony Brook University in NY') ner_runner = StanfordNERRunner() ner_runner(doc) self.assertTrue(doc.was_preprocess_done(PreProcessSteps.ner)) entities = doc.get_preprocess_result(PreProcessSteps.ner) self.assertEqual(len(entities), 2) self.assertEqual(entities[0].offset, 0) self.assertEqual(entities[0].offset_end, 2) self.assertEqual(entities[0].entity.kind, 'person') self.assertEqual(entities[1].offset, 6) self.assertEqual(entities[1].offset_end, 9) self.assertEqual(entities[1].entity.kind, 'organization')
def test_tagger_runner_not_overriding_by_default(self): doc = SentencedIEDocFactory( text='Some sentence. And some other. Indeed!') postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents] postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents] tag = TaggerRunner(postagger1) tag(doc) tag.postagger = postagger2 # XXX: accessing implementation tag(doc) self.assertTrue(all(x == 'A' for x in doc.postags))
def test(self): doc = SentencedIEDocFactory( text="Chase notes she's negative for HIV and Hepatitis C") lit_tagger_runner = LiteralNERRunner(['disease'], [self.tmp_file1.name]) lit_tagger_runner(doc) # (the tokenizer splits she's in two parts) entities_triples = [(6, 7, 'disease'), (8, 10, 'disease')] self.assertTrue(doc.was_preprocess_done(PreProcessSteps.ner)) entities = doc.get_preprocess_result(PreProcessSteps.ner) self.assertEqual(len(entities), len(entities_triples)) for e, (offset, offset_end, kind) in zip(entities, entities_triples): self.assertEqual(e.offset, offset) self.assertEqual(e.offset_end, offset_end) self.assertEqual(e.entity.kind, kind)
def test_ner_runner_finds_consecutive_entities(self): doc = SentencedIEDocFactory( text='The student Rami Eid Stony Brook University in NY') self.check_ner(doc, [(2, 4, 'person'), (4, 7, 'organization')])
def test_ner_runner_is_calling_ner(self): doc = SentencedIEDocFactory( text='Rami Eid is studying . At Stony Brook University in NY') self.check_ner(doc, [(0, 2, 'person'), (6, 9, 'organization')])