Пример #1
0
 def test_glossary_extractor(self) -> None:
     t = Tokenizer()
     g = ['New York', 'Shanghai', 'Los Angeles', 'Beijing']
     ge = GlossaryExtractor(g, 'test_glossary', t, 2, False)
     text = 'i live in los angeles. my hometown is Beijing'
     tokens = t.tokenize(text)
     test_result = [i.value for i in ge.extract(tokens)]
     expected = ["Beijing", "Los Angeles"]
     self.assertEqual(test_result, expected)
Пример #2
0
    def test_glossary_extractor(self) -> None:
        t = Tokenizer()
        text = 'i live in los angeles. my hometown is Beijing. I love New York City.'
        tokens = t.tokenize(text)

        ge = GlossaryExtractor(self.glossary_1, 'test_glossary', t, 3, False)
        results = [i.value for i in ge.extract(tokens)]
        expected = ['Beijing', 'los angeles', 'New York']

        self.assertEqual(results, expected)
Пример #3
0
    def test_case_sensitive(self) -> None:
        t = Tokenizer()
        text = 'i live in los angeles. my hometown is Beijing. I love New York City.'
        tokens = t.tokenize(text)

        g = ['Beijing', 'Los Angeles', 'New York', 'Shanghai']
        ge = GlossaryExtractor(g, 'test_glossary', t, 2, True)

        results = [i.value for i in ge.extract(tokens)]
        expected = ['Beijing', 'New York']

        self.assertEqual(results, expected)
Пример #4
0
    def setUp(self):
        self.text = 'Napoléon Bonaparte was a French statesman and military leader who rose to prominence during the ' \
                    'French Revolution and led several successful campaigns during the French Revolutionary Wars. ' \
                    'As Napoleon, he was Emperor of the French from 1804 until 1814, and again briefly in 1815 during ' \
                    'the Hundred Days. Napoleon dominated European and global affairs for more than a decade while ' \
                    'leading France against a series of coalitions in the Napoleonic Wars. He won most of these wars ' \
                    'and the vast majority of his battles, building a large empire that ruled over continental Europe ' \
                    'before its final collapse in 1815. He is considered one of the greatest commanders in history, ' \
                    'and his wars and campaigns are studied at military schools worldwide. Napoleon\'s political and ' \
                    'cultural legacy has endured as one of the most celebrated and controversial leaders in human history.'
        extractor = SpacyNerExtractor(extractor_name='spacy_ner_extractor')
        self.results = extractor.extract(self.text)

        glossary_1 = ['Beijing', 'Los Angeles', 'New York', 'Shanghai']
        t = Tokenizer()
        text = 'i live in los angeles. my hometown is Beijing. I love New York City.'
        tokens = t.tokenize(text)
        ge = GlossaryExtractor(glossary_1, 'test_glossary', t, 3, False)
        self.results2 = ge.extract(tokens)