def en_to_kana(msg_src): msg_list = re.split(r'([(a-z)(A-Z)]+)', msg_src) msg_list_fmt = [] for word in msg_list: if alkana.get_kana(word) is None: msg_list_fmt.append(word) else: msg_list_fmt.append(alkana.get_kana(word)) msg_fmt = ''.join(msg_list_fmt) return msg_fmt
def countMora(m_token): mora = 0 yomi = '' if m_token[-1] == '*': if alkana.get_kana(m_token[0]): yomi = alkana.get_kana(m_token[0]) else: yomi = m_token[-1] if yomi == '': if m_token[1] == '記号' or m_token[1] == '補助記号': mora = 0 else: mora = len(re.sub(r'[ゃゅょ]+?', '', m_token[0])) elif m_token[1] == '記号' or m_token[1] == '補助記号': mora = 0 else: mora = len(re.sub(r'[ャュョ]+?', '', yomi)) return mora
def replace_eng_to_kana(cls, msg: str) -> str: ''' 英語をかな読み文字に置換する 例)wood→うっど ''' _msg = msg # 英語かな読み対応辞書の作成 for word in cls.re_eng.findall(_msg): # alkanaは小文字検索 read = alkana.get_kana(word.lower()) if read is not None: _msg = _msg.replace(word, read, 1) return _msg
def test_add_external_data(self): self.assertEqual(None, alkana.get_kana('mmmmm')) file_path = os.path.join(os.path.dirname(__file__), 'sample_external_data.csv') alkana.add_external_data(file_path) self.assertEqual('テスト', alkana.get_kana('mmmmm'))
def test_get_kana_upper_case(self): self.assertEqual('ワールド', alkana.get_kana("World"))
def test_get_kana(self): self.assertEqual('ハロー', alkana.get_kana('hello'))
def test_get_kana_none_case(self): self.assertEqual(None, alkana.get_kana("abcdefg"))
def search_synonym(word, criteria_vowel_r): synonym_data = {} synonym_data['original'] = word word_yomi = mecab_get_yomi(word) word_phoneme = g2p(word_yomi) rhyme_pt = 0 word_phoneme.reverse() word_vowel_r = [x for x in word_phoneme if x in vowel] for index, p in enumerate(criteria_vowel_r): if p == word_vowel_r[index]: rhyme_pt += 1 else: break synonym_best = { 'word': word, 'rhyme_pt': rhyme_pt } # 問い合わせしたい単語がWordnetに存在するか確認する cur = conn.execute("select wordid from word where lemma='%s'" % word) word_id = 99999999 #temp for row in cur: word_id = row[0] # Wordnetに存在する語であるかの判定 if word_id==99999999: synonym_data['synonym'] = synonym_best return synonym_data # 入力された単語を含む概念を検索する cur = conn.execute("select synset from sense where wordid='%s'" % word_id) synsets = [] for row in cur: synsets.append(row[0]) # 概念に含まれる単語を検索して画面出力する for synset in synsets: cur3 = conn.execute("select wordid from sense where (synset='%s' and wordid!=%s)" % (synset,word_id)) for row3 in cur3: target_word_id = row3[0] cur3_1 = conn.execute("select lemma from word where wordid=%s" % target_word_id) for row3_1 in cur3_1: synonym = row3_1[0] if '_' in synonym: continue else: if isalpha(synonym): synonym = alkana.get_kana(synonym) if synonym != None: synonym_yomi = mecab_get_yomi(synonym) synonym_phoneme = g2p(synonym_yomi) if '<unk>' in synonym_phoneme: continue else: rhyme_pt = 0 # 音素を逆順にする synonym_phoneme.reverse() synonym_vowel_r = [x for x in synonym_phoneme if x in vowel] for index in range(min(len(criteria_vowel_r), len(synonym_vowel_r))): if criteria_vowel_r[index] == synonym_vowel_r[index]: rhyme_pt += 1 else: break if rhyme_pt > synonym_best['rhyme_pt']: synonym_best = { 'word': synonym, 'rhyme_pt': rhyme_pt } synonym_data['synonym'] = synonym_best return synonym_data