def get_trans_array(ori_phone_file, tgt_phone_file):
    #transform the orders of phone to kaldi format
    import utils
    ori = utils.read_phone_txt(ori_phone_file)
    ori.append('spn')
    tgt = utils.read_phone_txt(tgt_phone_file, 0)
    new_tgt = []
    for x in tgt:
        for s in ['<', '#']:
            if x.startswith(s):
                break
        else:
            new_tgt.append(x)
    trans = []
    for ix, x in enumerate(new_tgt):
        for iy, y in enumerate(ori):
            if x == y:
                trans.append(iy)
    return np.array(trans)
Пример #2
0
import utils
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("phone_table_path", type=str)
parser.add_argument("pdf_table_path", type=str)
parser.add_argument("--typ",
                    type=str,
                    default='mono',
                    choices=['mono', 'biphone'])

args = parser.parse_args()

phones = utils.read_phone_txt(args.phone_table_path, 0)
valid_phone = []
for x in phones:
    for s in ['<', '#']:
        if x.startswith(s):
            break
    else:
        valid_phone.append(x)
if args.typ == 'mono':
    utils.write_phone_file(valid_phone, args.pdf_table_path, True)
else:
    L = []
    for i in range(len(valid_phone)):
        for j in range(len(valid_phone) + 1):
            if j == 0:
                L.append('start' + '_' + valid_phone[i])
            else:
Пример #3
0
import sys

import utils as utl

phone_map_txt = sys.argv[1]
phone_list_txt = sys.argv[2]

phone_list = utl.read_phone_txt(phone_map_txt)
phone_list.append('spn')  # for oov word, which is requried for kaldi
utl.write_phone_file(phone_list, phone_list_txt)
import utils
import sys
phone_table_path = sys.argv[1]
pdf_table_path = sys.argv[2]

phones = utils.read_phone_txt(phone_table_path, 0)

L = []
for x in phones:
    for s in ['<', '#']:
        if x.startswith(s):
            break
    else:
        L.append(x)

utils.write_phone_file(L, pdf_table_path, True)
sys.path.append('scripts/')
import utils

parser = argparse.ArgumentParser()
parser.add_argument("phone_list_txt", type=str)
parser.add_argument(
    "--self_loop_prob",
    type=float,
    default=0.5,
    help=
    "Probabilty of staying in the same state. (1-self_loop_prob) is the probability of transition to other state"
)

args = parser.parse_args()
phone_list = utils.read_phone_txt(args.phone_list_txt, 0)

same_phone = []
trans_phone = []
for idx, phone in enumerate(phone_list):
    if phone in ['sil', 'spn']:
        same_phone.append(idx)
    if '_' in phone:
        x, y = phone.split('_')
        if x == y:
            same_phone.append(idx)
        else:
            trans_phone.append(idx)

print("<Topology>")
print("<TopologyEntry>")
import sys
sys.path.append('scripts/')
import utils
import math

#48 phones
monophones_txt = sys.argv[1]
all_phones_txt = sys.argv[2]
self_loop_prob = sys.argv[3]

trans_prob = -math.log(1 - float(self_loop_prob))
phones = utils.read_phone_txt(monophones_txt)
cd_phones = utils.read_phone_txt(all_phones_txt, 0)
print("0 1 0 0")
for idx, phone in enumerate(cd_phones):
    if '_' in phone:
        x, y = phone.split('_')
        iy = phones.index(y)
        if x == y:
            print("1 {} {} {}".format(iy + 2, idx, idx))
        else:
            print("1 {} {} {} {:.5f}".format(iy + 2, idx, idx, trans_prob))

for idx, phone in enumerate(cd_phones):
    if '_' in phone:
        x, y = phone.split('_')
        iy = phones.index(y)
        ix = phones.index(x)
        if x == y:
            print("{} {} {} {}".format(ix + 2, iy + 2, idx, idx))
        else:
Пример #7
0
import sys, os
sys.path.append('scripts/')
import utils

monophone_list = sys.argv[1]  # not include spn
dict_dir = sys.argv[2]
phone_list = utils.read_phone_txt(monophone_list)
with open(os.path.join(dict_dir, 'extra_questions.txt'), 'w') as f:
    f.write('')

with open(os.path.join(dict_dir, 'optional_silence.txt'), 'w') as f:
    f.write('sil' + '\n')

with open(os.path.join(dict_dir, 'silence_phones.txt'), 'w') as f:
    f.write('sil' + '\n')
    f.write('spn' + '\n')

with open(os.path.join(dict_dir, 'nonsilence_phones.txt'), 'w') as f:
    for phone1 in phone_list:
        for phone2 in phone_list:
            f.write(phone1 + '_' + phone2 + '\n')

with open(os.path.join(dict_dir, 'lexicon.txt'), 'w') as f:
    for phone1 in phone_list:
        for phone2 in phone_list:
            f.write("{} {} {}\n".format(phone2, phone1 + '_' + phone2,
                                        phone2 + '_' + phone2))
        f.write("{} {}\n".format(phone1, phone1 + '_' + phone1))
    f.write("<UNK> spn\n")