def test_translate_iteration_mark(self): self.assertEqual(translate_kana_iteration_mark("カヽキヽクヽケヽコヽ"), "カカキキククケケココ") self.assertEqual(translate_kana_iteration_mark("カヾキヾクヾケヾコヾ"), "カガキギクグケゲコゴ") self.assertEqual(kanji_to_romaji("カヾールッチ"), "kagaarutchi") self.assertEqual(kanji_to_romaji("コヽーミッチヾ"), "kokoomitchiji")
def test_translate_iteration_mark(self): self.assertEqual(translate_kana_iteration_mark(u"かゝきゝくゝけゝこゝ"), u"かかききくくけけここ") self.assertEqual(translate_kana_iteration_mark(u"かゞきゞくゞけゞこゞ"), u"かがきぎくぐけげこご") self.assertEqual(kanji_to_romaji(u"かゞーるっち"), u"kagaarutchi") self.assertEqual(kanji_to_romaji(u"こゝーみっちゞ"), u"kokoomitchiji")
def test_u_and_small_vowel(self): kana_expected_dict = { "ハロウィーン": "harowiin", "ソファ": "sofa", "ウィンドウズ": "windouzu", "チェック": "chekku", "ディスニ": "disuni", "ドゥラハン": "durahan", "パーティー": "paatii", "タトゥー": "tatuu", "クァルテット": "kwarutetto" } for k in list(kana_expected_dict.keys()): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k]) kana_expected_dict_s = { "ウィ": "wi", "ウェ": "we", "ウォ": "wo", "ヴァ": "va", "ヴィ": "vi", "ヴェ": "ve", "ヴォ": "vo", "ファ": "fa", "フィ": "fi", "フェ": "fe", "フォ": "fo", "ティ": "ti", "ディ": "di", "トゥ": "tu", "ドゥ": "du", "クァ": "kwa", "クィ": "kwi", "クェ": "kwe", "クォ": "kwo", "キェ": "kye", "グァ": "gwa", "グィ": "gwi", "グェ": "gwe", "グォ": "gwo", "ギェ": "gye", "スィ": "si", "ズィ": "zi", "シェ": "she", "ジェ": "je", "チェ": "che", "ツァ": "tsa", "ツィ": "tsi", "ツェ": "tse", "ツォ": "tso", "ホゥ": "hu", "イィ": "yi", "イェ": "ye" } for k in list(kana_expected_dict_s.keys()): self.assertEqual(kanji_to_romaji(k), kana_expected_dict_s[k])
def main(): """ iterate through each entry of JM_DICT use first romaji reading found if pos is unclassified or misc is archaism then do not include to dict multiple entries can have the same kanji only replace a kanji in dict if it has "ichi1/2" for ke_pri """ auto_jm_dict = {} root = xml.etree.ElementTree.parse(JM_DICT_FILE).getroot() entries = root.findall("entry") for e in entries: most_common_reading, freq_counter = get_most_common_reading(e) raw_first_pos = e.iterfind("sense").next().iterfind("pos").next().text misc = [m.text for m in e.iterfind("sense").next().iterfind("misc")] stripped_first_pos = strip_pos(raw_first_pos) if stripped_first_pos == "suru verb" or stripped_first_pos == "kuru verb": most_common_reading = most_common_reading[: -2] + " " + most_common_reading[ -2:] if stripped_first_pos != "unclassified" and "archaism" not in misc: for k_ele in e.iterfind("k_ele"): for k_ in k_ele.iterfind("keb"): if k_.text in auto_jm_dict: try: if freq_counter > 0 and freq_counter > auto_jm_dict[ k_.text]["freq"]: auto_jm_dict[k_.text] = { "romaji": kanji_to_romaji(most_common_reading), "w_type": stripped_first_pos, "freq": freq_counter } except IndexError: if k_.text in auto_jm_dict: del auto_jm_dict[k_.text] print(k_.text) else: try: auto_jm_dict[k_.text] = { "romaji": kanji_to_romaji(most_common_reading), "w_type": stripped_first_pos, "freq": freq_counter } except IndexError: if k_.text in auto_jm_dict: del auto_jm_dict[k_.text] print(k_.text) return auto_jm_dict
def test_soukon(self): kana_expected_dict = { u"チョット": "chotto", u"マッテ": "matte", u"ハッピョウケッカ": "happyoukekka", } for k in kana_expected_dict.keys(): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
def test_match_starting_at_full(self): test_and_expected = { u"のけ反る": "nokezoru", u"反る": "kaeru", } for key in test_and_expected.keys(): self.assertEqual(kanji_to_romaji(key), test_and_expected[key])
def test_plain_imperative(self): godan_expected = { u"会う": (u"会え", "ae"), u"待つ": (u"待て", "mate"), u"撮る": (u"撮れ", "tore"), u"読む": (u"読め", "yome"), u"遊ぶ": (u"遊べ", "asobe"), u"死ぬ": (u"死ね", "shine"), u"書く": (u"書け", "kake"), u"行く": (u"行け", "ike"), u"泳ぐ": (u"泳げ", "oyoge"), u"話す": (u"話せ", "hanase") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_imperative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_plain_volitional(self): godan_expected = { u"会う": (u"会おう", "aou"), u"待つ": (u"待とう", "matou"), u"撮る": (u"撮ろう", "torou"), u"読む": (u"読もう", "yomou"), u"遊ぶ": (u"遊ぼう", "asobou"), u"死ぬ": (u"死のう", "shinou"), u"書く": (u"書こう", "kakou"), u"行く": (u"行こう", "ikou"), u"泳ぐ": (u"泳ごう", "oyogou"), u"話す": (u"話そう", "hanasou") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_volitional(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_plain_te_form(self): godan_expected = { u"会う": (u"会って", "atte"), u"待つ": (u"待って", "matte"), u"撮る": (u"撮って", "totte"), u"読む": (u"読んで", "yonde"), u"遊ぶ": (u"遊んで", "asonde"), u"死ぬ": (u"死んで", "shinde"), u"書く": (u"書いて", "kaite"), u"行く": (u"行って", "itte"), u"泳ぐ": (u"泳いで", "oyoide"), u"話す": (u"話して", "hanashite") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_te_form(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_mo_particle(self): test_and_expected = { u"背中を押すもの": u"senaka wo osu mo no", # type change (押す) is Kanji to hiragana の u"私も": u"watashi mo" # is last character and previous is noun } for key in test_and_expected.keys(): self.assertEqual(kanji_to_romaji(key), test_and_expected[key])
def test_polite_present_negative(self): godan_expected = { "会う": ("会いません", "aimasen"), "待つ": ("待ちません", "machimasen"), "撮る": ("撮りません", "torimasen"), "読む": ("読みません", "yomimasen"), "遊ぶ": ("遊びません", "asobimasen"), "死ぬ": ("死にません", "shinimasen"), "書く": ("書きません", "kakimasen"), "行く": ("行きません", "ikimasen"), "泳ぐ": ("泳ぎません", "oyogimasen"), "話す": ("話しません", "hanashimasen") } for k in list(godan_expected.keys()): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_polite_present_negative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_polite_imperative(self): godan_expected = { "会う": ("会いなさい", "ainasai"), "待つ": ("待ちなさい", "machinasai"), "撮る": ("撮りなさい", "torinasai"), "読む": ("読みなさい", "yominasai"), "遊ぶ": ("遊びなさい", "asobinasai"), "死ぬ": ("死になさい", "shininasai"), "書く": ("書きなさい", "kakinasai"), "行く": ("行きなさい", "ikinasai"), "泳ぐ": ("泳ぎなさい", "oyoginasai"), "話す": ("話しなさい", "hanashinasai") } for k in list(godan_expected.keys()): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_polite_imperative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_plain_te_form_negative(self): godan_expected = { "会う": ("会わないで", "awanaide"), "待つ": ("待たないで", "matanaide"), "撮る": ("撮らないで", "toranaide"), "読む": ("読まないで", "yomanaide"), "遊ぶ": ("遊ばないで", "asobanaide"), "死ぬ": ("死なないで", "shinanaide"), "書く": ("書かないで", "kakanaide"), "行く": ("行かないで", "ikanaide"), "泳ぐ": ("泳がないで", "oyoganaide"), "話す": ("話さないで", "hanasanaide") } for k in list(godan_expected.keys()): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_te_form_negative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_polite_imperative_negative(self): godan_expected = { u"会う": (u"会いなさるな", "ainasaruna"), u"待つ": (u"待ちなさるな", "machinasaruna"), u"撮る": (u"撮りなさるな", "torinasaruna"), u"読む": (u"読みなさるな", "yominasaruna"), u"遊ぶ": (u"遊びなさるな", "asobinasaruna"), u"死ぬ": (u"死になさるな", "shininasaruna"), u"書く": (u"書きなさるな", "kakinasaruna"), u"行く": (u"行きなさるな", "ikinasaruna"), u"泳ぐ": (u"泳ぎなさるな", "oyoginasaruna"), u"話す": (u"話しなさるな", "hanashinasaruna") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_polite_imperative_negative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_plain_negative(self): godan_expected = { u"会う": (u"会わない", "awanai"), u"待つ": (u"待たない", "matanai"), u"撮る": (u"撮らない", "toranai"), u"読む": (u"読まない", "yomanai"), u"遊ぶ": (u"遊ばない", "asobanai"), u"死ぬ": (u"死なない", "shinanai"), u"書く": (u"書かない", "kakanai"), u"行く": (u"行かない", "ikanai"), u"泳ぐ": (u"泳がない", "oyoganai"), u"話す": (u"話さない", "hanasanai") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_negative(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_soukon(self): kana_expected_dict = { "ちょっと": "chotto", "まって": "matte", "はっぴょうけっか": "happyoukekka", } for k in list(kana_expected_dict.keys()): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
def test_polite_past(self): godan_expected = { u"会う": (u"会いました", "aimashita"), u"待つ": (u"待ちました", "machimashita"), u"撮る": (u"撮りました", "torimashita"), u"読む": (u"読みました", "yomimashita"), u"遊ぶ": (u"遊びました", "asobimashita"), u"死ぬ": (u"死にました", "shinimashita"), u"書く": (u"書きました", "kakimashita"), u"行く": (u"行きました", "ikimashita"), u"泳ぐ": (u"泳ぎました", "oyogimashita"), u"話す": (u"話しました", "hanashimashita") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_polite_past(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_long_vowel_with_soukon(self): kana_expected_dict = { u"リュー": "ryuu", u"ニュース": "nyuusu", u"デビュー": "debyuu", u"チュー": "chuu" } for k in kana_expected_dict.keys(): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
def test_plain_past(self): godan_expected = { u"会う": (u"会った", "atta"), u"待つ": (u"待った", "matta"), u"撮る": (u"撮った", "totta"), u"読む": (u"読んだ", "yonda"), u"遊ぶ": (u"遊んだ", "asonda"), u"死ぬ": (u"死んだ", "shinda"), u"書く": (u"書いた", "kaita"), u"行く": (u"行った", "itta"), u"泳ぐ": (u"泳いだ", "oyoida"), u"話す": (u"話した", "hanashita") } for k in godan_expected.keys(): set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1])) ck, cr = conjugate_godan_plain_past(k) self.assertEqual(godan_expected[k], (ck, cr))
def test_soukon_ch(self): kana_expected_dict = { "ぼっちゃん": "botchan", "こっち": "kotchi", "かっちょん": "katchon", "まっちゃ": "matcha", "みっち": "mitchi" } for k in list(kana_expected_dict.keys()): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
def test_soukon_ch(self): kana_expected_dict = { u"ボッチャン": "botchan", u"コッチ": "kotchi", u"カッチョン": "katchon", u"マッチャ": "matcha", u"ミッチ": "mitchi" } for k in kana_expected_dict.keys(): self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
def test_polite_past_negative(self): ichidan_expected = { u"寝る": (u"寝ませんでした", "nemasen deshita"), u"出来る": (u"出来ませんでした", "dekimasen deshita"), u"見つける": (u"見つけませんでした", "mitsukemasen deshita") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_polite_past_negative() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_plain_past_negative(self): ichidan_expected = { u"寝る": (u"寝なかった", "nenakatta"), u"出来る": (u"出来なかった", "dekinakatta"), u"見つける": (u"見つけなかった", "mitsukenakatta") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_plain_past_negative() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_polite_imperative_negative(self): ichidan_expected = { u"寝る": (u"寝なさるな", "nenasaruna"), u"出来る": (u"出来なさるな", "dekinasaruna"), u"見つける": (u"見つけなさるな", "mitsukenasaruna") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_polite_imperative_negative() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_plain_imperative(self): ichidan_expected = { u"寝る": (u"寝ろ", "nero"), u"出来る": (u"出来ろ", "dekiro"), u"見つける": (u"見つけろ", "mitsukero") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_plain_imperative() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_polite_volitional(self): ichidan_expected = { u"寝る": (u"寝ましょう", "nemashou"), u"出来る": (u"出来ましょう", "dekimashou"), u"見つける": (u"見つけましょう", "mitsukemashou") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_polite_volitional() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_plain_te_form(self): ichidan_expected = { u"寝る": (u"寝て", "nete"), u"出来る": (u"出来て", "dekite"), u"見つける": (u"見つけて", "mitsukete") } for k in ichidan_expected.keys(): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_plain_te_form() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_plain_volitional(self): ichidan_expected = { "寝る": ("寝よう", "neyou"), "出来る": ("出来よう", "dekiyou"), "見つける": ("見つけよう", "mitsukeyou") } for k in list(ichidan_expected.keys()): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_plain_volitional() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_plain_negative(self): ichidan_expected = { "寝る": ("寝ない", "nenai"), "出来る": ("出来ない", "dekinai"), "見つける": ("見つけない", "mitsukenai") } for k in list(ichidan_expected.keys()): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_plain_negative() self.assertEqual(ichidan_expected[k], (ck, cr))
def test_polite_past(self): ichidan_expected = { "寝る": ("寝ました", "nemashita"), "出来る": ("出来ました", "dekimashita"), "見つける": ("見つけました", "mitsukemashita") } for k in list(ichidan_expected.keys()): set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2]) ck, cr = conjugate_ichidan_polite_past() self.assertEqual(ichidan_expected[k], (ck, cr))