Exemplos de split_syllable_char em Python, exemplos de hangul_utils.split_syllable_char em Python

Exemplo n.º 1

0

Exibir arquivo

def get_middle_name(conn, n1, n2, saju, gender, n2_rsc_type, mode):
    name_dict = {}
    s = conn.cursor()
    query = """
    SELECT hanja,reading,strokes,add_strokes,rsc_type,pronunciations
    FROM naming_hanja
    WHERE is_naming_hanja=1 AND reading
    NOT IN ('만', '병', '백', '장', '춘', '최', '충', '창', '치', '참', '천',
    '택', '탁', '태', '외', '사', '매', '읍', '소', '종', '순', '요', '자',
    '경', '옥', '해', '부', '효', '존', '난', '류', '홍', '처', '필')
    """
    for n3 in s.execute(query):
        if n1[HANJA] == n3[HANJA] or n2[HANJA] == n3[HANJA]:  # 김주김, 김소소
            continue
        elif n1[READING] == n3[READING] or n2[READING] == n3[READING]:  # 김주김, 김소소
            continue

        n2n3 = '%s%s' % (n2[READING], n3[READING])
        if check_name(n2n3) is False:
            continue

        if check_last_name_gender(n3[READING], n2n3, gender) is False:
            continue

        if saju['c1'] != n3[RSC_TYPE] and saju['c2'] != n3[RSC_TYPE]:
            continue
        if n2_rsc_type == n3[RSC_TYPE]:  # 중간이름에서 사용한 자원오행은 배제함
            continue

        if check_positive_negative(conn, n1, n2, n3) is False:
            continue

        # check positive negative hanja strokes as well
        if check_total_stroke(conn, n1, n2, n3, mode) is False:
            continue

        s1 = split_syllable_char(n1[READING])
        s2 = split_syllable_char(n2[READING])
        s3 = split_syllable_char(n3[READING])
        if balum_oheng(s1, s2, s3) is False:
            continue

        if check_all_name_hard_pronounce(s1, s2, s3) is False:
            continue

        temp_hanja = '%s%s%s' % (n1[HANJA], n2[HANJA], n3[HANJA])
        # temp_hanja = '%s %s %s [%s/ %s]' % (n1[HANJA], n2[HANJA], n3[HANJA],
        #         n2[PRONUNCIATIONS], n3[PRONUNCIATIONS])
        temp_name = '%s%s%s' % (n1[READING], n2[READING], n3[READING])
        name_dict.update({temp_hanja: temp_name})

    if len(name_dict) == 0:
        return None
    return name_dict

Exemplo n.º 2

0

Exibir arquivo

def check_two_words_hard_pronounce(n1, n2):
    s1 = split_syllable_char(n1)
    s2 = split_syllable_char(n2)

    if len(s1) == 3 and len(s2) == 3:
        if s1[1] == 'ㅕ' and s1[2] == 'ㅇ' and s2[1] == 'ㅡ' and s2[2] == 'ㅇ':  # 경흥원
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅏ' and s2[2] == 'ㅇ':  # 강항준
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅑ' and s2[2] == 'ㅇ':  # 강향준
            return False
    return True

Exemplo n.º 3

0

Exibir arquivo

Arquivo: __init__.py Projeto: peternara/KoreanOCR-tf-flask

def get_label(index_data):
    # len + 1 for one 'invalid' label
    label_ko_cho = np.zeros([len(index_data), len(ko_chset_cho)+1])
    label_ko_jung = np.zeros([len(index_data), len(ko_chset_jung)+1])
    label_ko_jong = np.zeros([len(index_data), len(ko_chset_jong)+1])
    label_en = np.zeros([len(index_data), len(en_chset)+1])
    for i, member in enumerate(index_data):
        target = member['target'] # Target Character
        # Is Hangeul?
        if (len(target) == 1 and check_syllable(target)):
            splited = split_syllable_char(target)
            label_ko_cho[i][ko_chset_cho.index(splited[0])] = 1
            label_ko_jung[i][ko_chset_jung.index(splited[1])] = 1
            if len(splited) < 3:
                label_ko_jong[i][0] = 1
            else:
                label_ko_jong[i][ko_chset_jong.index(splited[2])] = 1
        else :
            label_ko_cho[i][len(ko_chset_cho)] = 1
            label_ko_jung[i][len(ko_chset_jung)] = 1
            label_ko_jong[i][len(ko_chset_jong)] = 1
        
        # Is English?
        if (target in en_chset):
            label_en[i][en_chset.index(target)] = 1
        else:
            label_en[i][len(en_chset)] = 1
            
    # Concatenate all labels
    label = np.concatenate((label_ko_cho, label_ko_jung, label_ko_jong, label_en), axis=1)
    print("label loaded")
    return label

Exemplo n.º 4

0

Exibir arquivo

Arquivo: 1.first_naming.py Projeto: imtaehyun/EdenNaming

def set_five_type(conn):
    query = 'SELECT hanja,is_naming_hanja,reading FROM naming_hanja'
    ftype_select = conn.cursor()
    for row in ftype_select.execute(query):
        if row[1] == 0:  # Can not use Korean name
            continue
        cs = split_syllable_char(row[2])  # 초성
        # hlen = hangul_len(row[2])
        f_type = get_five_type(cs[0])  # 음양'오행'
        if f_type == -1:
            continue
        print(f_type, row[0])
        update_hangul_strokes(f_type, row[0], conn)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: semantic.py Projeto: peternara/KoreanOCR-tf-flask

def eval_tail(c, cand):
    """Analyze tail point which is the measure of too many splits.
    They are normally length - 1
    """
    point = 0
    # Please please split the . and , symbols...!!
    if cand.value == '.' or cand.value == ',':
        point += 10

    if hasattr(c, "prob") and c.value is not None and len(c.value) > 0 and len(cand.value) > 0:
        if c.value == '\'':
            point += 2
        if cand.prob > 0.99 and check_syllable(c.value) and split_syllable_char(c.value)[1] == 'ㅏ':
            point += 2

    return cand.tail + 1 - point

Exemplo n.º 6

0

Exibir arquivo

def get_hangul_list():
    hanguls = []
    with open('unicode_hangul') as f:
        for line in f:
            hangul = line.split()
            for h in hangul:
                if check_syllable(h) is False:
                    continue
                split_h = split_syllable_char(h)
                try:
                    if BLOCK_LIST1[split_h[0]] == 1:
                        continue
                except:
                    try:
                        if BLOCK_LIST3[split_h[2]] == 1:
                            continue
                    except:
                        pass
                hanguls.append(h)
    return hanguls

Exemplo n.º 7

0

Exibir arquivo

Arquivo: generate.py Projeto: dj-shin/ctf2018-writeups

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

Exemplo n.º 8

0

Exibir arquivo

def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[
                    jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: utils.py Projeto: snowvil/Kiwi

 def split(c):
     from hangul_utils import split_syllable_char
     jm = split_syllable_char(c)
     #if jm[0] == 'ㅇ': return jm[1:]
     return jm