def test_translate_iteration_mark(self):
        self.assertEqual(translate_kana_iteration_mark("カヽキヽクヽケヽコヽ"),
                         "カカキキククケケココ")
        self.assertEqual(translate_kana_iteration_mark("カヾキヾクヾケヾコヾ"),
                         "カガキギクグケゲコゴ")

        self.assertEqual(kanji_to_romaji("カヾールッチ"), "kagaarutchi")
        self.assertEqual(kanji_to_romaji("コヽーミッチヾ"), "kokoomitchiji")
示例#2
0
    def test_translate_iteration_mark(self):
        self.assertEqual(translate_kana_iteration_mark(u"かゝきゝくゝけゝこゝ"),
                         u"かかききくくけけここ")
        self.assertEqual(translate_kana_iteration_mark(u"かゞきゞくゞけゞこゞ"),
                         u"かがきぎくぐけげこご")

        self.assertEqual(kanji_to_romaji(u"かゞーるっち"), u"kagaarutchi")
        self.assertEqual(kanji_to_romaji(u"こゝーみっちゞ"), u"kokoomitchiji")
    def test_u_and_small_vowel(self):
        kana_expected_dict = {
            "ハロウィーン": "harowiin",
            "ソファ": "sofa",
            "ウィンドウズ": "windouzu",
            "チェック": "chekku",
            "ディスニ": "disuni",
            "ドゥラハン": "durahan",
            "パーティー": "paatii",
            "タトゥー": "tatuu",
            "クァルテット": "kwarutetto"
        }
        for k in list(kana_expected_dict.keys()):
            self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])

        kana_expected_dict_s = {
            "ウィ": "wi",
            "ウェ": "we",
            "ウォ": "wo",
            "ヴァ": "va",
            "ヴィ": "vi",
            "ヴェ": "ve",
            "ヴォ": "vo",
            "ファ": "fa",
            "フィ": "fi",
            "フェ": "fe",
            "フォ": "fo",
            "ティ": "ti",
            "ディ": "di",
            "トゥ": "tu",
            "ドゥ": "du",
            "クァ": "kwa",
            "クィ": "kwi",
            "クェ": "kwe",
            "クォ": "kwo",
            "キェ": "kye",
            "グァ": "gwa",
            "グィ": "gwi",
            "グェ": "gwe",
            "グォ": "gwo",
            "ギェ": "gye",
            "スィ": "si",
            "ズィ": "zi",
            "シェ": "she",
            "ジェ": "je",
            "チェ": "che",
            "ツァ": "tsa",
            "ツィ": "tsi",
            "ツェ": "tse",
            "ツォ": "tso",
            "ホゥ": "hu",
            "イィ": "yi",
            "イェ": "ye"
        }

        for k in list(kana_expected_dict_s.keys()):
            self.assertEqual(kanji_to_romaji(k), kana_expected_dict_s[k])
def main():
    """
    iterate through each entry of JM_DICT
    use first romaji reading found
    if pos is unclassified or misc is archaism then do not include to dict
    multiple entries can have the same kanji
        only replace a kanji in dict if it has "ichi1/2" for ke_pri
    """
    auto_jm_dict = {}
    root = xml.etree.ElementTree.parse(JM_DICT_FILE).getroot()
    entries = root.findall("entry")

    for e in entries:
        most_common_reading, freq_counter = get_most_common_reading(e)
        raw_first_pos = e.iterfind("sense").next().iterfind("pos").next().text
        misc = [m.text for m in e.iterfind("sense").next().iterfind("misc")]
        stripped_first_pos = strip_pos(raw_first_pos)

        if stripped_first_pos == "suru verb" or stripped_first_pos == "kuru verb":
            most_common_reading = most_common_reading[:
                                                      -2] + " " + most_common_reading[
                                                          -2:]

        if stripped_first_pos != "unclassified" and "archaism" not in misc:

            for k_ele in e.iterfind("k_ele"):
                for k_ in k_ele.iterfind("keb"):
                    if k_.text in auto_jm_dict:
                        try:
                            if freq_counter > 0 and freq_counter > auto_jm_dict[
                                    k_.text]["freq"]:
                                auto_jm_dict[k_.text] = {
                                    "romaji":
                                    kanji_to_romaji(most_common_reading),
                                    "w_type": stripped_first_pos,
                                    "freq": freq_counter
                                }
                        except IndexError:
                            if k_.text in auto_jm_dict:
                                del auto_jm_dict[k_.text]
                                print(k_.text)

                    else:
                        try:
                            auto_jm_dict[k_.text] = {
                                "romaji": kanji_to_romaji(most_common_reading),
                                "w_type": stripped_first_pos,
                                "freq": freq_counter
                            }
                        except IndexError:
                            if k_.text in auto_jm_dict:
                                del auto_jm_dict[k_.text]
                                print(k_.text)

    return auto_jm_dict
 def test_soukon(self):
     kana_expected_dict = {
         u"チョット": "chotto",
         u"マッテ": "matte",
         u"ハッピョウケッカ": "happyoukekka",
     }
     for k in kana_expected_dict.keys():
         self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
示例#6
0
    def test_match_starting_at_full(self):
        test_and_expected = {
            u"のけ反る": "nokezoru",
            u"反る": "kaeru",
        }

        for key in test_and_expected.keys():
            self.assertEqual(kanji_to_romaji(key), test_and_expected[key])
示例#7
0
    def test_plain_imperative(self):
        godan_expected = {
            u"会う": (u"会え", "ae"),
            u"待つ": (u"待て", "mate"),
            u"撮る": (u"撮れ", "tore"),
            u"読む": (u"読め", "yome"),
            u"遊ぶ": (u"遊べ", "asobe"),
            u"死ぬ": (u"死ね", "shine"),
            u"書く": (u"書け", "kake"),
            u"行く": (u"行け", "ike"),
            u"泳ぐ": (u"泳げ", "oyoge"),
            u"話す": (u"話せ", "hanase")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_imperative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#8
0
    def test_plain_volitional(self):
        godan_expected = {
            u"会う": (u"会おう", "aou"),
            u"待つ": (u"待とう", "matou"),
            u"撮る": (u"撮ろう", "torou"),
            u"読む": (u"読もう", "yomou"),
            u"遊ぶ": (u"遊ぼう", "asobou"),
            u"死ぬ": (u"死のう", "shinou"),
            u"書く": (u"書こう", "kakou"),
            u"行く": (u"行こう", "ikou"),
            u"泳ぐ": (u"泳ごう", "oyogou"),
            u"話す": (u"話そう", "hanasou")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_volitional(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#9
0
    def test_plain_te_form(self):
        godan_expected = {
            u"会う": (u"会って", "atte"),
            u"待つ": (u"待って", "matte"),
            u"撮る": (u"撮って", "totte"),
            u"読む": (u"読んで", "yonde"),
            u"遊ぶ": (u"遊んで", "asonde"),
            u"死ぬ": (u"死んで", "shinde"),
            u"書く": (u"書いて", "kaite"),
            u"行く": (u"行って", "itte"),
            u"泳ぐ": (u"泳いで", "oyoide"),
            u"話す": (u"話して", "hanashite")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_te_form(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#10
0
    def test_mo_particle(self):
        test_and_expected = {
            u"背中を押すもの":
            u"senaka wo osu mo no",  # type change (押す) is Kanji to hiragana の
            u"私も": u"watashi mo"  # is last character and previous is noun
        }

        for key in test_and_expected.keys():
            self.assertEqual(kanji_to_romaji(key), test_and_expected[key])
    def test_polite_present_negative(self):
        godan_expected = {
            "会う": ("会いません", "aimasen"),
            "待つ": ("待ちません", "machimasen"),
            "撮る": ("撮りません", "torimasen"),
            "読む": ("読みません", "yomimasen"),
            "遊ぶ": ("遊びません", "asobimasen"),
            "死ぬ": ("死にません", "shinimasen"),
            "書く": ("書きません", "kakimasen"),
            "行く": ("行きません", "ikimasen"),
            "泳ぐ": ("泳ぎません", "oyogimasen"),
            "話す": ("話しません", "hanashimasen")
        }

        for k in list(godan_expected.keys()):
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_polite_present_negative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
    def test_polite_imperative(self):
        godan_expected = {
            "会う": ("会いなさい", "ainasai"),
            "待つ": ("待ちなさい", "machinasai"),
            "撮る": ("撮りなさい", "torinasai"),
            "読む": ("読みなさい", "yominasai"),
            "遊ぶ": ("遊びなさい", "asobinasai"),
            "死ぬ": ("死になさい", "shininasai"),
            "書く": ("書きなさい", "kakinasai"),
            "行く": ("行きなさい", "ikinasai"),
            "泳ぐ": ("泳ぎなさい", "oyoginasai"),
            "話す": ("話しなさい", "hanashinasai")
        }

        for k in list(godan_expected.keys()):
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_polite_imperative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
    def test_plain_te_form_negative(self):
        godan_expected = {
            "会う": ("会わないで", "awanaide"),
            "待つ": ("待たないで", "matanaide"),
            "撮る": ("撮らないで", "toranaide"),
            "読む": ("読まないで", "yomanaide"),
            "遊ぶ": ("遊ばないで", "asobanaide"),
            "死ぬ": ("死なないで", "shinanaide"),
            "書く": ("書かないで", "kakanaide"),
            "行く": ("行かないで", "ikanaide"),
            "泳ぐ": ("泳がないで", "oyoganaide"),
            "話す": ("話さないで", "hanasanaide")
        }

        for k in list(godan_expected.keys()):
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_te_form_negative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#14
0
    def test_polite_imperative_negative(self):
        godan_expected = {
            u"会う": (u"会いなさるな", "ainasaruna"),
            u"待つ": (u"待ちなさるな", "machinasaruna"),
            u"撮る": (u"撮りなさるな", "torinasaruna"),
            u"読む": (u"読みなさるな", "yominasaruna"),
            u"遊ぶ": (u"遊びなさるな", "asobinasaruna"),
            u"死ぬ": (u"死になさるな", "shininasaruna"),
            u"書く": (u"書きなさるな", "kakinasaruna"),
            u"行く": (u"行きなさるな", "ikinasaruna"),
            u"泳ぐ": (u"泳ぎなさるな", "oyoginasaruna"),
            u"話す": (u"話しなさるな", "hanashinasaruna")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_polite_imperative_negative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#15
0
    def test_plain_negative(self):
        godan_expected = {
            u"会う": (u"会わない", "awanai"),
            u"待つ": (u"待たない", "matanai"),
            u"撮る": (u"撮らない", "toranai"),
            u"読む": (u"読まない", "yomanai"),
            u"遊ぶ": (u"遊ばない", "asobanai"),
            u"死ぬ": (u"死なない", "shinanai"),
            u"書く": (u"書かない", "kakanai"),
            u"行く": (u"行かない", "ikanai"),
            u"泳ぐ": (u"泳がない", "oyoganai"),
            u"話す": (u"話さない", "hanasanai")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_negative(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#16
0
    def test_soukon(self):
        kana_expected_dict = {
            "ちょっと": "chotto",
            "まって": "matte",
            "はっぴょうけっか": "happyoukekka",
        }

        for k in list(kana_expected_dict.keys()):
            self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
示例#17
0
    def test_polite_past(self):
        godan_expected = {
            u"会う": (u"会いました", "aimashita"),
            u"待つ": (u"待ちました", "machimashita"),
            u"撮る": (u"撮りました", "torimashita"),
            u"読む": (u"読みました", "yomimashita"),
            u"遊ぶ": (u"遊びました", "asobimashita"),
            u"死ぬ": (u"死にました", "shinimashita"),
            u"書く": (u"書きました", "kakimashita"),
            u"行く": (u"行きました", "ikimashita"),
            u"泳ぐ": (u"泳ぎました", "oyogimashita"),
            u"話す": (u"話しました", "hanashimashita")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_polite_past(k)
            self.assertEqual(godan_expected[k], (ck, cr))
 def test_long_vowel_with_soukon(self):
     kana_expected_dict = {
         u"リュー": "ryuu",
         u"ニュース": "nyuusu",
         u"デビュー": "debyuu",
         u"チュー": "chuu"
     }
     for k in kana_expected_dict.keys():
         self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
示例#19
0
    def test_plain_past(self):
        godan_expected = {
            u"会う": (u"会った", "atta"),
            u"待つ": (u"待った", "matta"),
            u"撮る": (u"撮った", "totta"),
            u"読む": (u"読んだ", "yonda"),
            u"遊ぶ": (u"遊んだ", "asonda"),
            u"死ぬ": (u"死んだ", "shinda"),
            u"書く": (u"書いた", "kaita"),
            u"行く": (u"行った", "itta"),
            u"泳ぐ": (u"泳いだ", "oyoida"),
            u"話す": (u"話した", "hanashita")
        }

        for k in godan_expected.keys():
            set_global_godan(kanji_to_romaji(k), kanji_to_romaji(k[-1]))
            ck, cr = conjugate_godan_plain_past(k)
            self.assertEqual(godan_expected[k], (ck, cr))
示例#20
0
 def test_soukon_ch(self):
     kana_expected_dict = {
         "ぼっちゃん": "botchan",
         "こっち": "kotchi",
         "かっちょん": "katchon",
         "まっちゃ": "matcha",
         "みっち": "mitchi"
     }
     for k in list(kana_expected_dict.keys()):
         self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
 def test_soukon_ch(self):
     kana_expected_dict = {
         u"ボッチャン": "botchan",
         u"コッチ": "kotchi",
         u"カッチョン": "katchon",
         u"マッチャ": "matcha",
         u"ミッチ": "mitchi"
     }
     for k in kana_expected_dict.keys():
         self.assertEqual(kanji_to_romaji(k), kana_expected_dict[k])
    def test_polite_past_negative(self):
        ichidan_expected = {
            u"寝る": (u"寝ませんでした", "nemasen deshita"),
            u"出来る": (u"出来ませんでした", "dekimasen deshita"),
            u"見つける": (u"見つけませんでした", "mitsukemasen deshita")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_polite_past_negative()
            self.assertEqual(ichidan_expected[k], (ck, cr))
    def test_plain_past_negative(self):
        ichidan_expected = {
            u"寝る": (u"寝なかった", "nenakatta"),
            u"出来る": (u"出来なかった", "dekinakatta"),
            u"見つける": (u"見つけなかった", "mitsukenakatta")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_plain_past_negative()
            self.assertEqual(ichidan_expected[k], (ck, cr))
    def test_polite_imperative_negative(self):
        ichidan_expected = {
            u"寝る": (u"寝なさるな", "nenasaruna"),
            u"出来る": (u"出来なさるな", "dekinasaruna"),
            u"見つける": (u"見つけなさるな", "mitsukenasaruna")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_polite_imperative_negative()
            self.assertEqual(ichidan_expected[k], (ck, cr))
    def test_plain_imperative(self):
        ichidan_expected = {
            u"寝る": (u"寝ろ", "nero"),
            u"出来る": (u"出来ろ", "dekiro"),
            u"見つける": (u"見つけろ", "mitsukero")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_plain_imperative()
            self.assertEqual(ichidan_expected[k], (ck, cr))
    def test_polite_volitional(self):
        ichidan_expected = {
            u"寝る": (u"寝ましょう", "nemashou"),
            u"出来る": (u"出来ましょう", "dekimashou"),
            u"見つける": (u"見つけましょう", "mitsukemashou")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_polite_volitional()
            self.assertEqual(ichidan_expected[k], (ck, cr))
    def test_plain_te_form(self):
        ichidan_expected = {
            u"寝る": (u"寝て", "nete"),
            u"出来る": (u"出来て", "dekite"),
            u"見つける": (u"見つけて", "mitsukete")
        }

        for k in ichidan_expected.keys():
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_plain_te_form()
            self.assertEqual(ichidan_expected[k], (ck, cr))
示例#28
0
    def test_plain_volitional(self):
        ichidan_expected = {
            "寝る": ("寝よう", "neyou"),
            "出来る": ("出来よう", "dekiyou"),
            "見つける": ("見つけよう", "mitsukeyou")
        }

        for k in list(ichidan_expected.keys()):
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_plain_volitional()
            self.assertEqual(ichidan_expected[k], (ck, cr))
示例#29
0
    def test_plain_negative(self):
        ichidan_expected = {
            "寝る": ("寝ない", "nenai"),
            "出来る": ("出来ない", "dekinai"),
            "見つける": ("見つけない", "mitsukenai")
        }

        for k in list(ichidan_expected.keys()):
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_plain_negative()
            self.assertEqual(ichidan_expected[k], (ck, cr))
示例#30
0
    def test_polite_past(self):
        ichidan_expected = {
            "寝る": ("寝ました", "nemashita"),
            "出来る": ("出来ました", "dekimashita"),
            "見つける": ("見つけました", "mitsukemashita")
        }

        for k in list(ichidan_expected.keys()):
            set_global_ichidan(k[:-1], kanji_to_romaji(k)[:-2])
            ck, cr = conjugate_ichidan_polite_past()
            self.assertEqual(ichidan_expected[k], (ck, cr))