def gen_syllable_tag(syllable_label_path, tone, start_set, end_set, tag):
        # Format a_tscsda01_3
        #         'tscsd_manual'
        lf0_tags = []

        for set in Utility.char_range(start_set, end_set):
            path = '{}/{}'.format(syllable_label_path, set)

            count = Utility.count_valid_file(path)

            for i in range(1, count + 1):

                filepath = '{}/tscsd_stust_{}{}.stresslab'.format(
                    path, set, Utility.fill_zero(i, 2))
                syllable_count = 0
                for line in Utility.read_file_line_by_line(filepath):
                    syllable_count += 1

                    if tone == '01234':
                        lf0_tags.append('{}_{}_{}{}_{}'.format(
                            set, tag, set, Utility.fill_zero(i, 2),
                            syllable_count))
                    else:
                        if line[0] == tone:
                            lf0_tags.append('{}_{}_{}{}_{}'.format(
                                set, tag, set, Utility.fill_zero(i, 2),
                                syllable_count))

        return lf0_tags

        pass
def run_gen_mono(utt_set):

    set_path = '{}/{}/'.format(utterance_path, utt_set)

    set_syllable_base_path = '{}/{}/'.format(syllable_base, utt_set)

    out_set_path = '{}/{}/'.format(output_path, utt_set)
    Utility.make_directory(out_set_path)

    for i in xrange(1, 51):
        utt_file = Utility.yaml_load('{}/tscsd{}{}.utt.yaml'.format(
            set_path, utt_set, Utility.fill_zero(i, 2)))
        # print utt_file

        out_file = '{}/tscsd{}{}.lab'.format(out_set_path, utt_set,
                                             Utility.fill_zero(i, 2))

        stress_list = []
        recursion(utt_file, stress_list)

        syllable_time_label = Utility.read_file_line_by_line(
            '{}/tscsd{}{}.lab'.format(set_syllable_base_path, utt_set,
                                      Utility.fill_zero(i, 2)))
        # print stress_list, len(stress_list)
        # print len(syllable_time_label)
        if len(syllable_time_label) != len(stress_list):
            print utt_set, i
            # print 'Error'
            # sys.exit()

        out = []
        for idx, line in enumerate(syllable_time_label):
            # print line, stress_list[idx]
            o = '{}::{}'.format(
                Utility.trim(line).replace('-', '_').replace('+', '_'),
                stress_list[idx])
            # print o
            out.append(o)

        Utility.write_to_file_line_by_line(out_file, out)

        # sys.exit()

    pass
Пример #3
0
def run_command(feature_type, missing_data, data_object_base_path_name,
                base_out_path, input_dims, tone_list, dur_position):

    deltas = [[False, False], [True, False], [True, True]]

    output_name_paths = []

    for i, d in enumerate(deltas):
        outp = '{}/input_dims_{}/{}_delta-{}_delta-delta-{}/'.format(
            base_out_path, input_dims, Utility.fill_zero(i + 1, 2), d[0], d[1])
        output_name_paths.append(outp)

    print 'Missing Data : {}'.format(missing_data)

    for idx, output_name in enumerate(output_name_paths):

        delta_bool = deltas[idx][0]
        delta2_bool = deltas[idx][1]

        if missing_data:
            method_name = 'BayesianGPLVMMiniBatch_Missing'
        else:
            method_name = 'BGP_LVM'

        for tone in tone_list:

            print 'Delta : {}, Delta-Dealta : {}'.format(
                delta_bool, delta2_bool)

            data_object_path = '{}{}.pickle'.format(data_object_base_path_name,
                                                    tone)

            print 'data path ', data_object_path

            syllable_management = Utility.load_obj(data_object_path)

            output_path = '{}/{}_Tone_{}/'.format(output_name, method_name,
                                                  tone)

            Utility.make_directory(output_path)

            print output_path

            Latent_variable_model_Training.execute_Bayesian_GPLVM_training(
                syllable_management,
                feature_type,
                input_dims,
                output_path,
                dur_position=dur_position,
                subtract_typical_contour=False,
                exp=False,
                delta_bool=delta_bool,
                delta2_bool=delta2_bool,
                missing_data=missing_data)

    pass
Пример #4
0
def run_cal_distortion(basename, tmp_path, predictive, alpha, beta):

    for num in range(1, 51):

        name = '{}{}'.format(basename, Utility.fill_zero(num, 2))
        predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name)

        mean = np.load(predicted_mean_path)[:, 0]

        vuv = np.load('{}/{}.npy'.format(vuv_path, name))
        vuv = vuv.reshape(len(vuv))

        mean[np.where(vuv == -1.00000000e+10)] = -1.00000000e+10

        Utility.write_to_file_line_by_line('{}/{}.lf0'.format(tmp_path, name),
                                           mean)

    rmse, l = Distortion.lf0_distortion_syn_is_readable(org_path, tmp_path)

    print 'Alpha {}, Beta {}, LF0 RMSE: {:f} in {} frames'.format(
        alpha, beta, rmse, l)

    pass
    for b in np.arange(start, end, increment):

        beta = b

        print 'Beta : ', b

        outbase = '{}/num_dct_cov_{}/'.format(outname, num_coeff)
        outpath = '{}/Beta_{}/lf0/'.format(outbase, beta)
        figure_path = '{}/Beta_{}/fig/'.format(outbase, beta)

        Utility.make_directory(outpath)
        Utility.make_directory(figure_path)

        for num in range(1, 51):

            name = '{}{}'.format(basename, Utility.fill_zero(num, 2))

            print name

            outfile = '{}/{}.npy'.format(outpath, name)
            # Utility.make_directory(outfilepath)

            base_path = '{}/{}/'.format(frame_predicted_lf0_path, name)
            label_path = '{}/{}.lab'.format(syl_duration_path, name)

            var_path = '{}/inv_dimension_cov.npy'.format(
                frame_predicted_lf0_path)

            syllable_base_path = '{}/{}/'.format(syllable_predicted_dct_path,
                                                 name)
def run_command(feature_type, missing_data, data_object_base_path_name, base_out_path, input_dims, tone_list, dur_position, num_sampling):
    
    deltas = [
        [False, False],
        [True, False],
        [True, True]
    ]

    output_name_paths = []

    for i, d in enumerate(deltas):
        outp = '{}/input_dims_{}/{}_delta-{}_delta-delta-{}/'.format(base_out_path, input_dims, Utility.fill_zero(i+1,2), d[0], d[1])
        output_name_paths.append(outp)

    print 'Missing Data : {}'.format(missing_data)
    print 'Inducing points : 10 percent'

    for idx, output_name in enumerate(output_name_paths):

        delta_bool=deltas[idx][0]
        delta2_bool=deltas[idx][1]

        if missing_data:
            method_name = 'BayesianGPLVMMiniBatch_Missing'
        else :
            method_name = 'BGP_LVM'

        for tone in tone_list:

            print 'Delta : {}, Delta-Dealta : {}'.format(delta_bool, delta2_bool)

            data_object_path = '{}{}.pickle'.format(data_object_base_path_name, tone)

            print 'data path ',data_object_path

            syllable_management = Utility.load_obj(data_object_path)

            if len(syllable_management.syllables_list) == 0:
                print 'No syllable in this object database : {}'.format(tone)
                print '-----------------------------------------------------------------'
                continue

            output_path = '{}/{}_Tone_{}/'.format(output_name, method_name, tone)

            Utility.make_directory(output_path)

            print output_path

            Latent_variable_model_Training.execute_Bayesian_GPLVM_training(
                syllable_management, 
                feature_type, 
                input_dims, 
                output_path,
                num_sampling=num_sampling,
                dur_position=dur_position,
                delta_bool=delta_bool,
                delta2_bool=delta2_bool,
                missing_data=missing_data,
                num_inducing=int(len(syllable_management.syllables_list)*0.1))

    pass
        set_stress_path = '{}/{} lab/'.format(stress_path, ch)

        set_utt_base_path = '{}/{}/'.format(utt_base_path, ch)

        set_syllable_full_path = '{}/{}/'.format(syllable_full_path, ch)

        set_out_path = '{}/{}/'.format(out_path, ch)

        Utility.make_directory(set_out_path)

        if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path):
            print ch

            for i in xrange(1, 51):

                name = 'tscsd{}{}'.format(ch, Utility.fill_zero(i, 2))

                yaml_filename = '{}/{}.utt.yaml'.format(set_utt_base_path, name )
                if not Utility.is_file_exist(yaml_filename):
                    continue

                full_file = '{}/{}.lab'.format(set_syllable_full_path, name)

                count = [0]
                yaml = Utility.yaml_load(yaml_filename)
                add_stress(yaml, count, name)

                if not (len(Utility.read_file_line_by_line(full_file)) == count[0] ):
                    print 'Not equal'
                    print name, len(Utility.read_file_line_by_line(full_file)), count[0]
Пример #8
0
    out_path = '/work/w21/decha/Interspeech_2017/plot/single-multi-450/'
    Utility.make_directory(out_path)
    paths = [
        # ['/work/w21/decha/Interspeech_2017/real_result/single_250_lf0/', '/work/w21/decha/Interspeech_2017/real_result/multi_250_lf0/'],
        [
            '/work/w21/decha/Interspeech_2017/real_result/single_450_lf0/',
            '/work/w21/decha/Interspeech_2017/real_result/multi_450_lf0/'
        ]
        # ['/work/w21/decha/Interspeech_2017/real_result/single_250_lf0/', '/work/w21/decha/Interspeech_2017/real_result/single_450_lf0/']
    ]

    for path in paths:

        for i in range(1, 51):

            base = 'tscsdj{}'.format(Utility.fill_zero(i, 2))

            lf0_single = np.load('{}/{}.npy'.format(path[0], base))
            lf0_multi = np.load('{}/{}.npy'.format(path[1], base))

            print base
            single_vs_multi_rmse = lf0_distortion_syn_is_gpr_format(
                lf0_single, lf0_multi)

            original_lf0 = Utility.read_lf0_into_ascii('{}/{}.lf0'.format(
                original, base))

            single = lf0_distortion_syn_is_gpr_format(lf0_single, original_lf0)
            multi = lf0_distortion_syn_is_gpr_format(lf0_multi, original_lf0)

            print single_vs_multi_rmse, single, multi, 'Improve : ', (single -
Пример #9
0
    UNDEF_VALUE = -1.00000000e+10

    dur_path = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/dur/param_mean/'

    mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/j/'

    mono_to_syl_path = '/work/w2/decha/Data/GPR_speccom_data/phones_in_syllable_duration_object/j/'

    mono_outpath = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/mono/j/'
    Utility.make_directory(mono_outpath)

    syl_outpath = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/syllable/j/'
    Utility.make_directory(syl_outpath)

    for i in range(1, 51):
        path = '{}/tscsdj{}.npy'.format(dur_path, Utility.fill_zero(i, 2))
        mono = '{}/tscsdj{}.lab'.format(mono_path, Utility.fill_zero(i, 2))

        mono_to_syl = '{}/tscsdj{}.dur'.format(mono_to_syl_path,
                                               Utility.fill_zero(i, 2))

        mono_outfile = '{}/tscsdj{}.lab'.format(mono_outpath,
                                                Utility.fill_zero(i, 2))

        syl_outfile = '{}/tscsdj{}.lab'.format(syl_outpath,
                                               Utility.fill_zero(i, 2))

        gen_mono(path, mono, mono_to_syl, mono_outfile, syl_outfile)

        # sys.exit()
Пример #10
0
        outpath = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/dct_separated_tone_unstress/{}/{}/{}-coeff/tsc/sd/'.format(
            incl_zero, tone, num_coeff)
        Utility.make_directory(outpath)

        print outpath

        label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/mono/tsc/sd/'
        for s in Utility.char_range('a', 'z'):
            set_label_path = '{}/{}/'.format(label_path, s)

            set_dct_path = '{}/{}/'.format(outpath, s)
            Utility.make_directory(set_dct_path)

            for x in range(1, 51):

                name = 'tscsd{}{}'.format(s, Utility.fill_zero(x, 2))

                file_path = '{}/{}.lab'.format(set_label_path, name)

                if not Utility.is_file_exist(file_path): continue

                dur_list, names = PoGUtility.gen_dur_and_name_list(
                    file_path, name)

                if len(dur_list) != len(names):
                    print name

                # print names

                dct_list = []
Пример #11
0
if __name__ == '__main__':

    label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/syllable_time/'

    start = sys.argv[1]
    end = sys.argv[2]

    all_dur = 0

    for i in Utility.char_range(start, end):
        set_path = '{}/{}/'.format(label_path, i)

        for n in range(1, 51):
            filepath = '{}/tscsd{}{}.lab'.format(set_path, i,
                                                 Utility.fill_zero(n, 2))

            for line in Utility.read_file_line_by_line(filepath):
                l = Utility.trim(line)
                spl = l.split(' ')
                if spl[2] in ['sil-sil+sil-x', 'pau-pau+pau-x']:
                    print spl[2]
                    continue
                else:
                    all_dur = all_dur + (int(spl[1]) - int(spl[0]))

    print all_dur
    print float(all_dur) / 10000000.0 / 60.0

    pass
    np.save(outpath, np.array(out))


if __name__ == '__main__':

    full_path = '/work/w2/decha/Data/GPR_speccom_data/full_with_stress/tsc/sd/'

    out_main_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/stress_list/'

    for sett in Utility.char_range('a', 'z'):
        sett_path = '{}/{}/'.format(full_path, sett)

        sett_out = '{}/{}/'.format(out_main_path, sett)

        Utility.make_directory(sett_out)

        for num in range(1, 51):
            filepath = '{}/tscsd{}{}.lab'.format(sett_path, sett,
                                                 Utility.fill_zero(num, 2))

            if not Utility.is_file_exist(filepath): continue

            outfile = '{}/tscsd{}{}.npy'.format(sett_out, sett,
                                                Utility.fill_zero(num, 2))

            gen_stress(filepath, outfile)

            # sys.exit()

    pass
import numpy as np
import matplotlib.pyplot as plt

import numpy as np

import re

if __name__ == '__main__':

    outpath = '/work/w2/decha/Data/GPR_speccom_data/01_phone_level_data/stress_list/j/'

    Utility.make_directory(outpath)

    for num in range(1, 51):

        name = 'tscsdj{}'.format(Utility.fill_zero(num, 2))

        filename = '/work/w2/decha/Data/GPR_speccom_data/full_time_with_stress/tsc/sd/j/{}.lab'.format(
            name)

        pattern = re.compile(
            r"""(?P<start>.+)\s(?P<end>.+)\s.+\-(?P<curphone>.+)\+.+/A:.+\-(?P<phone_position>.+)_.+\+.+/B:.+\-(?P<tone>.+)\+.+/C:.+/I:.+\-(?P<stress>.+)\+.+""",
            re.VERBOSE)
        lines = Utility.read_file_line_by_line(filename)

        out = []

        for line in lines:
            # print line
            match = re.match(pattern, line)
            if match:
import numpy

import array

if __name__ == '__main__':

    mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/'
    mono_with_tab_path = '/work/w2/decha/Data/GPR_speccom_data/mono_with_tab/tsc/sd/'

    for sett in Utility.char_range('a', 'z'):

        Utility.make_directory('{}/{}/'.format(mono_with_tab_path, sett))

        for i in range(1, 51):

            base = 'tscsd{}{}'.format(sett, Utility.fill_zero(i, 2))

            mono = '{}/{}/{}.lab'.format(mono_path, sett, base)
            mono_with_tab = '{}/{}/{}.lab'.format(mono_with_tab_path, sett,
                                                  base)

            out = []

            for line in Utility.read_file_line_by_line(mono):
                l = Utility.trim(line)
                l = l.replace(' ', '\t')
                out.append(l)

            Utility.write_to_file_line_by_line(mono_with_tab, out)

    pass