def test_extract_entities(self): """ Test """ text = 'The United Nations (UN) is an intergovernmental organization '\ 'to promote international co-operation. A replacement for the '\ 'ineffective League of Nations, the organization was established '\ 'on 24 October 1945 after World War II in order to prevent '\ 'another such conflict. At its founding, the UN had 51 member '\ 'states; there are now 193. The headquarters of the United '\ 'Nations is in Manhattan, New York City, and experiences '\ 'extraterritoriality. Further main offices are situated in '\ 'Geneva, Nairobi, and Vienna. The organization is financed by '\ 'assessed and voluntary contributions from its member states. '\ 'Its objectives include maintaining international peace and '\ 'security, promoting human rights, fostering social and economic '\ 'development, protecting the environment, and providing '\ 'humanitarian aid in cases of famine, natural disaster, and armed '\ 'conflict.' # probleme with World War II not concatenated # we may take also NN and NNS with combination of CC and IN grammar = 'NE : {<NNP|NNPS|NN>*?<NNP|NNPS|JJ|NNS|NN>+}' result = Parser().extract_entities(text, grammar) self.assertEqual(result[0][1], 'english') self.assertTrue('intergovernmental organization' in result[0][0])
def parser(self): return Parser()
def test_tokenize_japanese(self): text = 'これは日本語です。これは私の最高の例です。' self.assertEqual(Parser().tokenize(text, 'ja')[0], [['これ', 'は', '日本語', 'です'], ['これ', 'は', '私', 'の', '最高', 'の', '例', 'です']])
def test_tokenize_text_is_none(self): self.assertEqual(Parser().tokenize(None)[0], [])
def test_tokenize_english(self): text = 'This is my best example.' self.assertEqual(Parser().tokenize(text)[0], [['This', 'is', 'my', 'best', 'example', '.']])
def test_language_traditional_chinese(self): text = '這是我最好的例子' self.assertEqual(Parser().detect_language(text), 'zh-cn')
def test_language_korean(self): text = '이것이 나의 가장 좋은 본보기이다' self.assertEqual(Parser().detect_language(text), 'ko')
def test_language_italian(self): text = 'Questo è il mio miglior esempio' self.assertEqual(Parser().detect_language(text), 'it')
def test_language_simplified_chinese(self): text = '这是我最好的例子' self.assertEqual(Parser().detect_language(text), 'zh-cn')
def test_language_spanish(self): text = 'Este es mi mejor ejemplo' self.assertEqual(Parser().detect_language(text), 'es')
def test_language_german(self): text = 'Das ist mein bestes Beispiel' self.assertEqual(Parser().detect_language(text), 'de')
def test_language_japanese(self): text = 'これは日本語です' self.assertEqual(Parser().detect_language(text), 'ja')
def test_language_french(self): text = 'Ceci est mon meilleur exemple' self.assertEqual(Parser().detect_language(text), 'fr')
def test_language_english(self): text = 'This is my best example' self.assertEqual(Parser().detect_language(text), 'en')
def test_language_text_is_none(self): self.assertEqual(Parser().detect_language(None), None)
def parser(self): return Parser('en')