Exemplo n.º 1
0
def get_middle_name(conn, n1, n2, saju, gender, n2_rsc_type, mode):
    name_dict = {}
    s = conn.cursor()
    query = """
    SELECT hanja,reading,strokes,add_strokes,rsc_type,pronunciations
    FROM naming_hanja
    WHERE is_naming_hanja=1 AND reading
    NOT IN ('만', '병', '백', '장', '춘', '최', '충', '창', '치', '참', '천',
    '택', '탁', '태', '외', '사', '매', '읍', '소', '종', '순', '요', '자',
    '경', '옥', '해', '부', '효', '존', '난', '류', '홍', '처', '필')
    """
    for n3 in s.execute(query):
        if n1[HANJA] == n3[HANJA] or n2[HANJA] == n3[HANJA]:  # 김주김, 김소소
            continue
        elif n1[READING] == n3[READING] or n2[READING] == n3[READING]:  # 김주김, 김소소
            continue

        n2n3 = '%s%s' % (n2[READING], n3[READING])
        if check_name(n2n3) is False:
            continue

        if check_last_name_gender(n3[READING], n2n3, gender) is False:
            continue

        if saju['c1'] != n3[RSC_TYPE] and saju['c2'] != n3[RSC_TYPE]:
            continue
        if n2_rsc_type == n3[RSC_TYPE]:  # 중간이름에서 사용한 자원오행은 배제함
            continue

        if check_positive_negative(conn, n1, n2, n3) is False:
            continue

        # check positive negative hanja strokes as well
        if check_total_stroke(conn, n1, n2, n3, mode) is False:
            continue

        s1 = split_syllable_char(n1[READING])
        s2 = split_syllable_char(n2[READING])
        s3 = split_syllable_char(n3[READING])
        if balum_oheng(s1, s2, s3) is False:
            continue

        if check_all_name_hard_pronounce(s1, s2, s3) is False:
            continue

        temp_hanja = '%s%s%s' % (n1[HANJA], n2[HANJA], n3[HANJA])
        # temp_hanja = '%s %s %s [%s/ %s]' % (n1[HANJA], n2[HANJA], n3[HANJA],
        #         n2[PRONUNCIATIONS], n3[PRONUNCIATIONS])
        temp_name = '%s%s%s' % (n1[READING], n2[READING], n3[READING])
        name_dict.update({temp_hanja: temp_name})

    if len(name_dict) == 0:
        return None
    return name_dict
Exemplo n.º 2
0
def check_two_words_hard_pronounce(n1, n2):
    s1 = split_syllable_char(n1)
    s2 = split_syllable_char(n2)

    if len(s1) == 3 and len(s2) == 3:
        if s1[1] == 'ㅕ' and s1[2] == 'ㅇ' and s2[1] == 'ㅡ' and s2[2] == 'ㅇ':  # 경흥원
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅏ' and s2[2] == 'ㅇ':  # 강항준
            return False
        elif s1[1] == 'ㅏ' and s1[2] == 'ㅇ' and s2[1] == 'ㅑ' and s2[2] == 'ㅇ':  # 강향준
            return False
    return True
Exemplo n.º 3
0
def get_label(index_data):
    # len + 1 for one 'invalid' label
    label_ko_cho = np.zeros([len(index_data), len(ko_chset_cho)+1])
    label_ko_jung = np.zeros([len(index_data), len(ko_chset_jung)+1])
    label_ko_jong = np.zeros([len(index_data), len(ko_chset_jong)+1])
    label_en = np.zeros([len(index_data), len(en_chset)+1])
    for i, member in enumerate(index_data):
        target = member['target'] # Target Character
        # Is Hangeul?
        if (len(target) == 1 and check_syllable(target)):
            splited = split_syllable_char(target)
            label_ko_cho[i][ko_chset_cho.index(splited[0])] = 1
            label_ko_jung[i][ko_chset_jung.index(splited[1])] = 1
            if len(splited) < 3:
                label_ko_jong[i][0] = 1
            else:
                label_ko_jong[i][ko_chset_jong.index(splited[2])] = 1
        else :
            label_ko_cho[i][len(ko_chset_cho)] = 1
            label_ko_jung[i][len(ko_chset_jung)] = 1
            label_ko_jong[i][len(ko_chset_jong)] = 1
        
        # Is English?
        if (target in en_chset):
            label_en[i][en_chset.index(target)] = 1
        else:
            label_en[i][len(en_chset)] = 1
            
    # Concatenate all labels
    label = np.concatenate((label_ko_cho, label_ko_jung, label_ko_jong, label_en), axis=1)
    print("label loaded")
    return label
Exemplo n.º 4
0
def set_five_type(conn):
    query = 'SELECT hanja,is_naming_hanja,reading FROM naming_hanja'
    ftype_select = conn.cursor()
    for row in ftype_select.execute(query):
        if row[1] == 0:  # Can not use Korean name
            continue
        cs = split_syllable_char(row[2])  # 초성
        # hlen = hangul_len(row[2])
        f_type = get_five_type(cs[0])  # 음양'오행'
        if f_type == -1:
            continue
        print(f_type, row[0])
        update_hangul_strokes(f_type, row[0], conn)
Exemplo n.º 5
0
def eval_tail(c, cand):
    """Analyze tail point which is the measure of too many splits.
    They are normally length - 1
    """
    point = 0
    # Please please split the . and , symbols...!!
    if cand.value == '.' or cand.value == ',':
        point += 10

    if hasattr(c, "prob") and c.value is not None and len(c.value) > 0 and len(cand.value) > 0:
        if c.value == '\'':
            point += 2
        if cand.prob > 0.99 and check_syllable(c.value) and split_syllable_char(c.value)[1] == 'ㅏ':
            point += 2

    return cand.tail + 1 - point
Exemplo n.º 6
0
def get_hangul_list():
    hanguls = []
    with open('unicode_hangul') as f:
        for line in f:
            hangul = line.split()
            for h in hangul:
                if check_syllable(h) is False:
                    continue
                split_h = split_syllable_char(h)
                try:
                    if BLOCK_LIST1[split_h[0]] == 1:
                        continue
                except:
                    try:
                        if BLOCK_LIST3[split_h[2]] == 1:
                            continue
                    except:
                        pass
                hanguls.append(h)
    return hanguls
Exemplo n.º 7
0
def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)
Exemplo n.º 8
0
def generate(flag):
    cho_sung = {
        'ㄱ': 'k',
        'ㄴ': 'u',
        'ㄷ': 'i',
        'ㄹ': 'm',
        'ㅁ': 'y',
        'ㅂ': 'o',
        'ㅅ': 'n',
        'ㅇ': 'j',
        'ㅈ': 'l',
        'ㅊ': 'hl',
        'ㅋ': 'hk',
        'ㅌ': 'hi',
        'ㅍ': 'ho',
        'ㅎ': 'h',
        'ㅆ': 'nn',
        'ㄸ': 'ii',
        'ㄲ': 'kk',
        'ㅉ': 'll',
    }
    jung_sung = {
        'ㅏ': 'f',
        'ㅑ': '.g',
        'ㅓ': 'r',
        'ㅕ': 't',
        'ㅗ': 'v',
        'ㅛ': '.v',
        'ㅜ': 'b',
        'ㅠ': '.b',
        'ㅡ': 'g',
        'ㅣ': 'd',
        'ㅐ': 'fd',
        'ㅔ': 'c',
        'ㅟ': 'bd',
        'ㅘ': 'vf',
        'ㅚ': 'vd',
        'ㅢ': 'gd',
        'ㅝ': 'vr',
    }
    jong_sung = {
        'ㄱ': 'x',
        'ㄴ': 's',
        'ㄷ': ';z',
        'ㄹ': 'e',
        'ㅁ': 'z',
        'ㅂ': 'w',
        'ㅅ': 'q',
        'ㅇ': 'a',
        'ㅈ': ';e',
        'ㅊ': ';q',
        'ㅋ': ';x',
        'ㅌ': 'sz',
        'ㅍ': ';w',
        'ㅎ': ';s',
        'ㅆ': ';',
        'ㄶ': 'sa',
        'ㅄ': 'wq',
    }
    key_list = list()
    for c in flag:
        try:
            jamo = split_syllable_char(c)
            if len(jamo) == 2:
                k1, k2 = cho_sung[jamo[0]], jung_sung[jamo[1]]
                key_list += [k1, k2]
            else:
                k1, k2, k3 = cho_sung[jamo[0]], jung_sung[jamo[1]], jong_sung[
                    jamo[2]]
                key_list += [k1, k2, k3]
        except ValueError:
            if c == ' ' or c == '\n':
                key_list.append(c)
            else:
                assert False, '"{}" is invalid'.format(c)
    keyset = list(set(''.join(key_list)))
    sound_map = {k: i for i, k in enumerate(keyset)}
    msg = '-'.join(['{:02x}'.format(sound_map[c]) for c in ''.join(key_list)])
    with open('keystroke.txt', 'wt') as f:
        f.write(msg)
Exemplo n.º 9
0
 def split(c):
     from hangul_utils import split_syllable_char
     jm = split_syllable_char(c)
     #if jm[0] == 'ㅇ': return jm[1:]
     return jm