Python HTK примеры использования

Язык программирования: Python

Пространство имен/Пакет: zrst.asr

Класс/Тип: HTK

Примеров на hotexamples.com: 7

Python HTK - 7 примеров найдено. Это лучшие примеры Python кода для zrst.asr.HTK, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

HTK(5)

pone2word(2)

word2pone(2)

wordList(2)

word_list(2)

writeDCT(2)

readDCT(1)

readMLF(1)

Пример #1

Показать файл

def parse_pattern_load_count(input_mlf, input_dictionary):
    temp_count = 'temp_count.txt'
    temp_line = 'temp_line.txt'
    N = 100
    HTK().readMLF(input_mlf,
                  ['sil', 'sp', '<s>', '</s>']).writeMLF(temp_line, ['lm'])
    os.system('ngram-count -text {} -write {} -order {} -vocab {}'.format(
        temp_line, temp_count, str(N), input_dictionary))
    S = open(temp_count).readlines()

    # total_count = 0
    # for raw_line in S:
    # line= raw_line.rstrip('\n').split()
    # total_count += int(line [-1])
    #f_threshold = float(total_count)/len(S)

    f_threshold = 2
    count_dict = dict({})
    for raw_line in S:
        line = raw_line.rstrip('\n').split()
        if int(line[-1]) < f_threshold: continue
        if '<s>' in line or '</s>' in line: continue
        phrase = []
        for i in range(len(line) - 1):
            phrase.append(line[i])
        phrase = tuple(phrase)
        count_dict[phrase] = int(line[-1])
    return count_dict

Пример #2

Показать файл

Файл: external.py Проект: catfishking/MAT-MRNN-STD

def HVite_English(script, output_mlf):
    #mess, dont use yet
    ASRroot = EXEroot + 'English_ASR/'
    am = ASRroot + 'hmmdefs_mono'
    #am      = ASRroot + 'hmmdefs'
    tied = ASRroot + 'phone_list.txt'
    #tied    = ASRroot + 'tiedlist_phone.txt'
    config = ASRroot + 'config.cfg'
    lex = 'temp_asr_lex.txt'
    wdnet = 'temp_asr_mlf.txt'
    grammar = 'temp_asr_gmr.txt'

    tied_list = map(lambda x: x.strip('\n'), open(tied).readlines())
    L = open(lex, 'w')
    for t in tied_list:
        L.write(t + ' ' + t + '\n')
    L.close()
    #pyHTK.HTK().readDCT(lex).writeDCT(grammar,['grammar','sil','sp'])
    HTK().readDCT(lex).writeDCT(grammar, ['full'])
    os.system('HParse {} {}'.format(grammar, wdnet))
    os.system("HVite -D -H {} -S {} -C {} -w {} -l '*' -i {} -p 0.0 -s 0.0 {} {}".format( \
        am,
        script, config,
        wdnet, output_mlf,
        lex, tied
    ))

Пример #3

Показать файл

Файл: external.py Проект: catfishking/MAT-MRNN-STD

def lab2wav(label, wav_dir, corpus):
    '''
    input: transcription label
    output: wav folder
    option: wav corpus
    '''
    SYS().cldir(wav_dir)
    A = HTK().readMLF(label)
    waveData = dict({})
    params = []
    # print 'ready'
    for wavefile in A.wav2word:
        W = wave.open(corpus + wavefile + '.wav')
        scale = float(W.getnframes()) / A.wav2word[wavefile][-1][1]
        params = W.getparams()
        for word in A.wav2word[wavefile]:
            framechunk = W.readframes(int(scale * (word[2] - word[1])))
            if word[0] in waveData:
                try:
                    waveData[word[0]] += framechunk
                except:
                    print word[0], 'out of memory'
                    pass
            else:
                waveData[word[0]] = framechunk
    for word in waveData:
        if "<" in word:
            continue
            # this should only happen for words with illegal characters
        S = wave.open(wav_dir + word + '.wav', 'w')
        S.setparams(params)
        S.writeframes(waveData[word])

Пример #4

Показать файл

Файл: suffix.py Проект: C2Tao/zrst

def parse_pattern(input_dictionary, input_mlf, output_dictionary, N=2):
    def avg_dict(count_dict):
        tot_con, avg_con = 0, 0
        for phrase in count_dict:
            tot_con += count_dict[phrase]
            avg_con += 1
        print tot_con, avg_con, tot_con / avg_con
        return 1.0 * tot_con / avg_con

    def add_phrase(phrase):
        if phrase in p2w: return
        word = ''
        for phone in phrase:
            word += str(phone)
        w2p[word] = phrase
        p2w[phrase] = word
        wdl.append(word)

    def entropy(v):
        s = sum(v)
        v = [float(p) / s for p in v]
        H = 0
        for p in v:
            H -= p * math.log(p, 2)
        return H

    count_dict = parse_pattern_load_count(input_mlf, input_dictionary, N)
    cond_dict_tail = ({})
    cond_dict_head = ({})
    for phrase in count_dict:
        try:
            cond_dict_tail[phrase[:-1]].append(count_dict[phrase])
        except:
            cond_dict_tail[phrase[:-1]] = [count_dict[phrase]]
        try:
            cond_dict_head[phrase[1:]].append(count_dict[phrase])
        except:
            cond_dict_head[phrase[1:]] = [count_dict[phrase]]

    entropy_head = ({})
    entropy_tail = ({})
    for phrase in count_dict:
        if phrase not in cond_dict_tail or phrase not in cond_dict_head: continue
        entropy_tail[phrase] = entropy(cond_dict_tail[phrase])
        entropy_head[phrase] = entropy(cond_dict_head[phrase])

    thr_count = avg_dict(count_dict) * 5
    thr_head = avg_dict(entropy_head)
    thr_tail = avg_dict(entropy_tail)
    w2p = dict({})
    p2w = dict({})
    wdl = []
    for phrase in count_dict:
        if phrase in entropy_tail and phrase in entropy_tail:
            if entropy_head[phrase] > thr_head and entropy_tail[phrase] > thr_tail and count_dict[phrase] > thr_count:
                add_phrase(tuple(phrase))
    A = HTK()
    A.word2pone = w2p
    A.pone2word = p2w
    A.word_list = wdl
    A.writeDCT(output_dictionary, ['sil', 'sp'])

Пример #5

Показать файл

Файл: suffix.py Проект: C2Tao/zrst

def flatten_pattern(input_dictionary, input_mlf, output_dictionary, output_mlf):
    split_token = 'p'

    H = HTK()
    H.readMLF(input_mlf, ['sil', 'sp', '<s>', '</s>'])
    H.readDCT(input_dictionary, ['sil', 'sp', '<s>', '</s>'])

    M = open(output_mlf, 'w')

    M.write('#!MLF!#\n')
    # print H.wavList
    for wav in H.wav_list:
        M.write('"*/{}.rec"\n'.format(wav))
        for word in H.wav2word[wav]:
            # print word
            beg = int(word[1])
            end = int(word[2])
            pro = int(word[3])
            p_list = word[0].split(split_token)[1:]
            p_count = len(p_list)

            delta = 1.0 * (end - beg) / p_count
            p_beg = [int(beg + delta * i) for i in range(p_count)]
            p_end = [int(beg + delta * (i + 1)) for i in range(p_count)]
            p_pro = []
            for i in range(p_count):
                p_pro.append(pro / p_count)
            for i in range(p_count):
                if p_list[i] == '': continue
                M.write(str(p_beg[i]) + ' ')
                M.write(str(p_end[i]) + ' ')
                M.write(split_token + p_list[i] + ' ')
                M.write(str(p_pro[i]))
                M.write('\n')
        M.write('.\n')

    F = HTK()
    F.word_list = []
    F.word2pone = dict({})
    F.pone2word = dict({})
    for pone in H.pone2word:
        for p in pone:
            if p in F.word_list: continue
            F.word_list.append(p)
            F.pone2word[p,] = p
            F.word2pone[p] = p,
    F.writeDCT(output_dictionary, ['sil', 'sp'])

Пример #6

Показать файл

def parse_pattern(input_dictionary, input_mlf, output_dictionary):
    def avg_dict(count_dict):
        tot_con, avg_con = 0, 0
        for phrase in count_dict:
            tot_con += count_dict[phrase]
            avg_con += 1
        print tot_con, avg_con, tot_con / avg_con
        return 1.0 * tot_con / avg_con

    def add_phrase(phrase):
        if phrase in p2w: return
        word = ''
        for phone in phrase:
            word += str(phone)
        w2p[word] = phrase
        p2w[phrase] = word
        wdl.append(word)

    def entropy(v):
        s = sum(v)
        v = [float(p) / s for p in v]
        H = 0
        for p in v:
            H -= p * math.log(p, 2)
        return H

    count_dict = parse_pattern_load_count(input_mlf, input_dictionary)
    cond_dict_tail = ({})
    cond_dict_head = ({})
    for phrase in count_dict:
        try:
            cond_dict_tail[phrase[:-1]].append(count_dict[phrase])
        except:
            cond_dict_tail[phrase[:-1]] = [count_dict[phrase]]
        try:
            cond_dict_head[phrase[1:]].append(count_dict[phrase])
        except:
            cond_dict_head[phrase[1:]] = [count_dict[phrase]]

    entropy_head = ({})
    entropy_tail = ({})
    for phrase in count_dict:
        if phrase not in cond_dict_tail or phrase not in cond_dict_head:
            continue
        entropy_tail[phrase] = entropy(cond_dict_tail[phrase])
        entropy_head[phrase] = entropy(cond_dict_head[phrase])

    thr_count = avg_dict(count_dict) * 5
    thr_head = avg_dict(entropy_head)
    thr_tail = avg_dict(entropy_tail)
    w2p = dict({})
    p2w = dict({})
    wdl = []
    for phrase in count_dict:
        if phrase in entropy_tail and phrase in entropy_tail:
            if entropy_head[phrase] > thr_head and entropy_tail[
                    phrase] > thr_tail and count_dict[phrase] > thr_count:
                add_phrase(tuple(phrase))
    A = HTK()
    A.word2pone = w2p
    A.pone2word = p2w
    A.word_list = wdl
    A.writeDCT(output_dictionary, ['sil', 'sp'])

Пример #7

Показать файл

def flatten_pattern(input_dictionary, input_mlf, output_dictionary,
                    output_mlf):
    split_token = 'p'

    H = HTK()
    H.readMLF(input_mlf, ['sil', 'sp', '<s>', '</s>'])
    H.readDCT(input_dictionary, ['sil', 'sp', '<s>', '</s>'])

    M = open(output_mlf, 'w')

    M.write('#!MLF!#\n')
    for wav in H.wav_list:
        M.write('"*/{}.rec"\n'.format(wav))
        for word in H.wav2word[wav]:
            #print word
            beg = int(word[1])
            end = int(word[2])
            pro = int(word[3])
            p_list = word[0].split(split_token)[1:]
            p_count = len(p_list)

            delta = 1.0 * (end - beg) / p_count
            p_beg = [int(beg + delta * i) for i in range(p_count)]
            p_end = [int(beg + delta * (i + 1)) for i in range(p_count)]
            p_pro = []
            for i in range(p_count):
                p_pro.append(pro / p_count)
            for i in range(p_count):
                if p_list[i] == '': continue
                M.write(str(p_beg[i]) + ' ')
                M.write(str(p_end[i]) + ' ')
                M.write(split_token + p_list[i] + ' ')
                M.write(str(p_pro[i]))
                M.write('\n')
        M.write('.\n')

    F = HTK()
    F.word_list = []
    F.word2pone = dict({})
    F.pone2word = dict({})
    for pone in H.pone2word:
        #print pone
        for p in pone:
            if p in F.word_list: continue
            F.word_list.append(p)
            F.pone2word[p, ] = p
            F.word2pone[p] = p,
    F.writeDCT(output_dictionary, ['sil', 'sp'])