def test_swadesh_coptic(self): swadesh = Swadesh('cop') first_word = 'ⲁⲛⲟⲕ' match = swadesh.words()[0] self.assertEqual(first_word, match) turn = ['ⲡⲱⲱⲛⲉ', 'ⲕⲧⲟ'] match = swadesh.words()[125] self.assertEqual(turn, match) match = len(swadesh.words()) self.assertEqual(match, 207)
def test_swadesh_sanskrit(self): swadesh = Swadesh('sa') first_word = 'अहम्' match = swadesh.words()[0] self.assertEqual(first_word, match)
import nltk from nltk.tag import tnt from nltk.corpus import indian from cltk.corpus.swadesh import Swadesh from cltk.stop.classical_hindi.stops import STOPS_LIST def hindi_model(): train_data = indian.tagged_sents('hindi.pos') tnt_pos_tagger = tnt.TnT() tnt_pos_tagger.train(train_data) return tnt_pos_tagger print(STOPS_LIST[:5]) swadesh = Swadesh('hi') print(swadesh.words()[:10]) hindi_text = 'सब छात्रों के लिए हिंदी व्याकरण से जुड़ी बहुत महत्वपूर्ण पुस्तक की तैयारी कर रहे है.' hindi_text_tokenize = nltk.word_tokenize(hindi_text) print(hindi_text_tokenize[0:10]) model = hindi_model() print(model.tag(hindi_text_tokenize))
def test_swadesh_arabic(self): swadesh = Swadesh('ar') first_word = "أنا" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_hindi(self): swadesh = Swadesh('hi') first_word = 'मैं' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_portuguese(self): swadesh = Swadesh('pt_old') first_word = 'eu' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_latin(self): swadesh = Swadesh('la') first_word = 'ego' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_latin(self): swadesh = Swadesh("la") first_word = "ego" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_greek(self): swadesh = Swadesh("gr") first_word = "ἐγώ" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_norse(self): swadesh = Swadesh("old_norse") first_word = "ek" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_english(self): swadesh = Swadesh("eng_old") first_word = "ic, iċċ, ih" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_hindi(self): swadesh = Swadesh("hi") first_word = "मैं" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_sanskrit(self): swadesh = Swadesh("sa") first_word = "अहम्" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_portuguese(self): swadesh = Swadesh("pt_old") first_word = "eu" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_norse(self): swadesh = Swadesh('old_norse') first_word = 'ek' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_tocharianB(self): swadesh = Swadesh("txb") first_word = "ñäś" match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_greek(self): swadesh = Swadesh('gr') first_word = 'ἐγώ' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_hebrew(self): swadesh = Swadesh('hbo') first_word = '\'anî, \'ānokî ' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_tocharianB(self): swadesh = Swadesh('txb') first_word = 'ñäś' match = swadesh.words()[0] self.assertEqual(first_word, match)
def test_swadesh_old_english(self): swadesh = Swadesh('eng_old') first_word = 'ic, iċċ, ih' match = swadesh.words()[0] self.assertEqual(first_word, match)