Exemplo n.º 1
0
def get_aligned_jointdata(orgdata, orgnpow, tardata, tarnpow, cvdata=None):
    """Get aligment between features

    Paramters
    ---------
    orgdata : array, shape (`T_org`, `dim`)
        Acoustic feature of source speaker
    orgnpow : array, shape (`T_org`)
        Normalized power of soruce speaker
    orgdata : array, shape (`T_tar`, `dim`)
        Acoustic feature of target speaker
    orgnpow : array, shape (`T_tar`)
        Normalized power of target speaker
    cvdata : array, optiona, shape (`T_org`, `dim`)
        Converted acoustic feature from source into target

    Returns
    ---------
    jdata : array, shape (`T_new` `dim * 2`)
        Joint feature vector between source and target
    twf : array, shape (`T_new`, `2`)
        Time warping function
    mcd : float,
        Mel-cepstrum distortion between source and target

    """

    # extract extsddata
    org_extsddata = extfrm(static_delta(orgdata), orgnpow)
    tar_extsddata = extfrm(static_delta(tardata), tarnpow)

    if cvdata is None:
        # calculate twf and mel-cd
        twf = estimate_twf(org_extsddata, tar_extsddata, distance='melcd')
        mcd = melcd(org_extsddata[twf[0]], tar_extsddata[twf[1]])
    else:
        if orgdata.shape != cvdata.shape:
            raise ValueError('Dimension mismatch between orgdata and cvdata: \
                             {} {}'.format(orgdata.shape, cvdata.shape))
        # calculate twf and mel-cd with converted data
        cv_extsddata = extfrm(static_delta(cvdata), orgnpow)
        twf = estimate_twf(cv_extsddata, tar_extsddata, distance='melcd')
        mcd = melcd(cv_extsddata[twf[0]], tar_extsddata[twf[1]])

    # concatenate joint feature data into joint feature matrix
    jdata = np.c_[org_extsddata[twf[0]], tar_extsddata[twf[1]]]

    return jdata, twf, mcd
Exemplo n.º 2
0
def exts_post_FA(data, FA, delta=False):
    phone = []
    time = []
    with open(FA, 'r') as f:
        for i in csv.reader(f, dialect='excel', delimiter='\t'):
            phone.append(i[1])
            time.append(i[0])

    sil_index = [i for i, x in enumerate(phone) if x == 'sil']
    reject_frames = []
    for i in sil_index:
        [start_time, end_time] = time[i].split(' ')
        for frame in range(
                int(float(start_time) * 100) + 1,
                int(float(end_time) * 100) + 1):
            reject_frames.append(frame)

    total_frames = list(
        set(range(0, int(float(time[-1].split(' ')[1]) * 100))) -
        set(reject_frames))
    if delta == False:
        return (np.array([data[i] for i in total_frames]))
    else:
        new_data = []
        new_data = np.array([data[i] for i in total_frames])
        return (static_delta(new_data))
Exemplo n.º 3
0
def extsddata(data, npow, power_threshold=-20):
    """Get power extract static and delta feature vector

    Paramters
    ---------
    data : array, shape (`T`, `dim`)
        Acoustic feature vector
    npow : array, shape (`T`)
        Normalized power vector
    power_threshold : float, optional,
        Power threshold
        Default set to -20

    Returns
    -------
    extsddata : array, shape (`T_new` `dim * 2`)
        Silence remove static and delta feature vector

    """
    if len(data) != len(npow):
        npow = npow[:len(data)]
    extsddata = extfrm(static_delta(data),
                       npow,
                       power_threshold=power_threshold)
    return extsddata
Exemplo n.º 4
0
def generate_align_indexes(pair_path: Tuple[Path, Path]):
    path1, path2 = pair_path
    if path1.stem != path2.stem:
        print('warning: the file names are different', path1, path2)

    out = Path(arguments.output, path1.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # original
    wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second1,
                    post_second=arguments.pad_second1)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat1.analyze(x)
    mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha)

    if arguments.threshold_db1 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db1,
            fft_length=sconf1.wav_fftl,
            frame_period=sconf1.wav_shiftms,
        )
        mcep = mcep[indexes]

    cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                   cvtype=pconf.GMM_mcep_cvtype)
    mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow]

    # target
    wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second2,
                    post_second=arguments.pad_second2)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat2.analyze(x)
    mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha)

    if arguments.threshold_db2 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db2,
            fft_length=sconf2.wav_fftl,
            frame_period=sconf2.wav_shiftms,
        )
        mcep2 = mcep2[indexes]

    # align
    feature1 = AcousticFeature(mc=mcep1)
    feature2 = AcousticFeature(mc=mcep2)
    align_indexes = AlignIndexes.extract(feature1,
                                         feature2,
                                         dtype=arguments.dtype)
    align_indexes.save(path=out,
                       validate=True,
                       ignores=arguments.ignore_feature)
Exemplo n.º 5
0
    def test_BlockDiagonalGMM(self):
        jnt = np.random.rand(1000, 20)
        gmm_tr = GMMTrainer(n_mix=32, n_iter=10, covtype='block_diag')
        gmm_tr.train(jnt)

        data = np.random.rand(200, 5)
        sddata = static_delta(data)
        gmm_cv = GMMConvertor(n_mix=32, covtype='full', gmmmode=None)
        gmm_cv.open_from_param(gmm_tr.param)

        odata = gmm_cv.convert(sddata, cvtype='mlpg')
        odata = gmm_cv.convert(sddata, cvtype='mmse')

        assert data.shape == odata.shape
Exemplo n.º 6
0
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None):
    """Conversion of mel-cesptrums

    Parameters
    ---------
    pconf : PairYAML,
        Class of PairYAML
    org_mceps : list, shape (`num_mceps`)
        List of mel-cepstrums
    gmm : sklean.mixture.GaussianMixture,
        Parameters of sklearn-based Gaussian mixture
    gmmmode : str, optional
        Flag to parameter generation technique
        `None` : Normal VC
        `diff` : Differential VC
        Default set to `None`

    Returns
    ---------
    cv_mceps : list , shape(`num_mceps`)
        List of converted mel-cespstrums

    """
    cvgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=gmmmode,
    )
    cvgmm.open_from_param(gmm.param)

    sd = 1  # start dimension to convert
    cv_mceps = []
    for mcep in org_mceps:
        mcep_0th = mcep[:, 0]
        cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]),
                               cvtype=pconf.GMM_mcep_cvtype)
        cvmcep = np.c_[mcep_0th, cvmcep]
        if gmmmode == 'diff':
            cvmcep[:, sd:] += mcep[:, sd:]
        elif gmmmode is not None:
            raise ValueError('gmmmode must be `None` or `diff`.')
        cv_mceps.append(cvmcep)

    return cv_mceps
Exemplo n.º 7
0
def feature_conversion(pconf, org_mceps, gmm, gmmmode=None):
    """Conversion of mel-cesptrums

    Parameters
    ---------
    pconf : PairYAML,
        Class of PairYAML
    org_mceps : list, shape (`num_mceps`)
        List of mel-cepstrums
    gmm : sklean.mixture.GaussianMixture,
        Parameters of sklearn-based Gaussian mixture
    gmmmode : str, optional
        Flag to parameter generation technique
        `None` : Normal VC
        `diff` : Differential VC
        Default set to `None`

    Returns
    ---------
    cv_mceps : list , shape(`num_mceps`)
        List of converted mel-cespstrums

    """
    cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                         covtype=pconf.GMM_mcep_covtype,
                         gmmmode=gmmmode,
                         )
    cvgmm.open_from_param(gmm.param)

    sd = 1  # start dimension to convert
    cv_mceps = []
    for mcep in org_mceps:
        mcep_0th = mcep[:, 0]
        cvmcep = cvgmm.convert(static_delta(mcep[:, sd:]),
                               cvtype=pconf.GMM_mcep_cvtype)
        cvmcep = np.c_[mcep_0th, cvmcep]
        if gmmmode == 'diff':
            cvmcep[:, sd:] += mcep[:, sd:]
        elif gmmmode is not None:
            raise ValueError('gmmmode must be `None` or `diff`.')
        cv_mceps.append(cvmcep)

    return cv_mceps
Exemplo n.º 8
0
def extsddata(data, npow, power_threshold=-20):
    """Get power extract static and delta feature vector

    Paramters
    ---------
    data : array, shape (`T`, `dim`)
        Acoustic feature vector
    npow : array, shape (`T`)
        Normalized power vector
    power_threshold : float, optional,
        Power threshold
        Default set to -20

    Returns
    -------
    extsddata : array, shape (`T_new` `dim * 2`)
        Silence remove static and delta feature vector

    """

    extsddata = extfrm(static_delta(data), npow,
                       power_threshold=power_threshold)
    return extsddata
Exemplo n.º 9
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-gmmmode',
                        '--gmmmode',
                        type=str,
                        default=None,
                        help='mode of the GMM [None, diff, or intra]')
    parser.add_argument('org', type=str, help='Original speaker')
    parser.add_argument('tar', type=str, help='Target speaker')
    parser.add_argument('org_yml',
                        type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('eval_list_file',
                        type=str,
                        help='List file for evaluation')
    parser.add_argument('wav_dir',
                        type=str,
                        help='Directory path of source spekaer')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of pair directory')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.org_yml)
    pconf = PairYML(args.pair_yml)

    # read GMM for mcep
    mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl')
    mcepgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=args.gmmmode,
    )
    param = joblib.load(mcepgmmpath)
    mcepgmm.open_from_param(param)
    print("GMM for mcep conversion mode: {}".format(args.gmmmode))

    # read F0 statistics
    stats_dir = os.path.join(args.pair_dir, 'stats')
    orgstatspath = os.path.join(stats_dir, args.org + '.h5')
    orgstats_h5 = HDF5(orgstatspath, mode='r')
    orgf0stats = orgstats_h5.read(ext='f0stats')
    orgstats_h5.close()

    # read F0 and GV statistics for target
    tarstatspath = os.path.join(stats_dir, args.tar + '.h5')
    tarstats_h5 = HDF5(tarstatspath, mode='r')
    tarf0stats = tarstats_h5.read(ext='f0stats')
    targvstats = tarstats_h5.read(ext='gv')
    tarstats_h5.close()

    # read GV statistics for converted mcep
    cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5')
    cvgvstats_h5 = HDF5(cvgvstatspath, mode='r')
    cvgvstats = cvgvstats_h5.read(ext='cvgv')
    diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv')
    cvgvstats_h5.close()

    mcepgv = GV()
    f0stats = F0statistics()

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # test directory
    test_dir = os.path.join(args.pair_dir, 'test')
    os.makedirs(os.path.join(test_dir, args.org), exist_ok=True)

    # conversion in each evaluation file
    with open(args.eval_list_file, 'r') as fp:
        for line in fp:
            # open wav file
            f = line.rstrip()
            wavf = os.path.join(args.wav_dir, f + '.wav')
            fs, x = wavfile.read(wavf)
            x = x.astype(np.float)
            x = low_cut_filter(x, fs, cutoff=70)
            assert fs == sconf.wav_fs

            # analyze F0, mcep, and ap
            f0, spc, ap = feat.analyze(x)
            mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
            mcep_0th = mcep[:, 0]

            # convert F0
            cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats)

            # convert mcep
            cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                           cvtype=pconf.GMM_mcep_cvtype)
            cvmcep = np.c_[mcep_0th, cvmcep_wopow]

            # synthesis VC w/ GV
            if args.gmmmode is None:
                cvmcep_wGV = mcepgv.postfilter(cvmcep,
                                               targvstats,
                                               cvgvstats=cvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1)
                wav = synthesizer.synthesis(
                    cvf0,
                    cvmcep_wGV,
                    ap,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_VC.wav')

            # synthesis DIFFVC w/ GV
            if args.gmmmode == 'diff':
                cvmcep[:, 0] = 0.0
                cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep,
                                               targvstats,
                                               cvgvstats=diffcvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1) - mcep
                wav = synthesizer.synthesis_diff(
                    x,
                    cvmcep_wGV,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_DIFFVC.wav')

            # write waveform
            if not os.path.exists(os.path.join(test_dir, f)):
                os.makedirs(os.path.join(test_dir, f))

            wav = np.clip(wav, -32768, 32767)
            wavfile.write(wavpath, fs, wav.astype(np.int16))
            print(wavpath)
Exemplo n.º 10
0
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('org_list_file',
                        type=str,
                        help='List file of original speaker')
    parser.add_argument('tar_list_file',
                        type=str,
                        help='List file of target speaker')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of h5 files')
    args = parser.parse_args(argv)

    # read pair-dependent yml file
    pconf = PairYML(args.pair_yml)

    # read source and target features from HDF file
    h5_dir = os.path.join(args.pair_dir, 'h5')
    org_mceps = read_feats(args.org_list_file, h5_dir, ext='mcep')
    org_npows = read_feats(args.org_list_file, h5_dir, ext='npow')
    tar_mceps = read_feats(args.tar_list_file, h5_dir, ext='mcep')
    tar_npows = read_feats(args.tar_list_file, h5_dir, ext='npow')
    assert len(org_mceps) == len(tar_mceps)
    assert len(org_npows) == len(tar_npows)
    assert len(org_mceps) == len(org_npows)

    itnum = 1
    sd = 1  # start dimension for aligment of mcep
    num_files = len(org_mceps)
    print('{}-th joint feature extraction starts.'.format(itnum))

    # first iteration
    for i in range(num_files):
        jdata, _, mcd = get_aligned_jointdata(org_mceps[i][:,
                                                           sd:], org_npows[i],
                                              tar_mceps[i][:,
                                                           sd:], tar_npows[i])
        print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        if i == 0:
            jnt = jdata
        else:
            jnt = np.r_[jnt, jdata]
    itnum += 1

    # second through final iteration
    while itnum < pconf.jnt_n_iter + 1:
        print('{}-th joint feature extraction starts.'.format(itnum))
        # train GMM
        trgmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                           n_iter=pconf.GMM_mcep_n_iter,
                           covtype=pconf.GMM_mcep_covtype)
        trgmm.train(jnt)

        cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                             covtype=pconf.GMM_mcep_covtype)
        cvgmm.open_from_param(trgmm.param)
        twfs = []
        for i in range(num_files):
            cvmcep = cvgmm.convert(static_delta(org_mceps[i][:, sd:]),
                                   cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_aligned_jointdata(org_mceps[i][:, sd:],
                                                    org_npows[i],
                                                    tar_mceps[i][:, sd:],
                                                    tar_npows[i],
                                                    cvdata=cvmcep)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
            if i == 0:
                jnt = jdata
            else:
                jnt = np.r_[jnt, jdata]
            twfs.append(twf)

        itnum += 1

    # save joint feature vector
    jnt_dir = os.path.join(args.pair_dir, 'jnt')
    if not os.path.exists(jnt_dir):
        os.makedirs(jnt_dir)
    jntpath = os.path.join(jnt_dir, 'it' + str(itnum) + '_jnt.h5')
    jnth5 = HDF5(jntpath, mode='w')
    jnth5.save(jnt, ext='jnt')
    jnth5.close()

    # save GMM
    gmm_dir = os.path.join(args.pair_dir, 'GMM')
    if not os.path.exists(gmm_dir):
        os.makedirs(gmm_dir)
    gmmpath = os.path.join(gmm_dir, 'it' + str(itnum) + '_gmm.pkl')
    joblib.dump(trgmm.param, gmmpath)

    # save twf
    twf_dir = os.path.join(args.pair_dir, 'twf')
    if not os.path.exists(twf_dir):
        os.makedirs(twf_dir)
    with open(args.org_list_file, 'r') as fp:
        for line, twf in zip(fp, twfs):
            f = os.path.basename(line.rstrip())
            twfpath = os.path.join(twf_dir,
                                   'it' + str(itnum) + '_' + f + '.h5')
            twfh5 = HDF5(twfpath, mode='w')
            twfh5.save(twf, ext='twf')
            twfh5.close()
Exemplo n.º 11
0
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jfvs : list,
        List of joint feature vectors
    twfs : list,
        List of time warping functions
    """
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
def align_ppg_feature_vectors(odata,
                              onpows,
                              tdata,
                              tnpows,
                              pconf,
                              s_list_file,
                              tar_list_file,
                              opow=-100,
                              tpow=-100,
                              itnum=3,
                              sd=0,
                              given_twfs=None,
                              otflag=None):
    s_list_file = np.loadtxt(s_list_file, dtype='str')
    tar_list_file = np.loadtxt(tar_list_file, dtype='str')
    num_files = len(odata)
    cvgmm, cvdata = None, None
    jfvs = []
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        twfs = []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            if it == 1:
                jdata = get_ppg_alignment(odata[i],
                                          onpows[i],
                                          tdata[i],
                                          tnpows[i],
                                          s_list_file[i],
                                          tar_list_file[i],
                                          opow=opow,
                                          tpow=tpow,
                                          sd=sd,
                                          cvdata=cvdata,
                                          given_twf=gtwf,
                                          otflag=otflag)
                print(s_list_file[i])
                jfvs.append(jdata)
                jnt_data = transform_jnt(jfvs)
            _, twf, _ = get_alignment(odata[i],
                                      onpows[i],
                                      tdata[i],
                                      tnpows[i],
                                      opow=opow,
                                      tpow=tpow,
                                      sd=sd,
                                      cvdata=cvdata,
                                      given_twf=gtwf,
                                      otflag=otflag)
            twfs.append(twf)

        if it != itnum:
            # train GMM, if not final iteration
            print("training GMM")
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs
Exemplo n.º 13
0
def align_feature_vectors(odata, onpows, tdata, tnpows, pconf,
                          opow=-100, tpow=-100, itnum=3, sd=0,
                          given_twfs=None, otflag=None):
    """Get alignment to create joint feature vector

    Paramters
    ---------
    odata : list, (`num_files`)
        List of original feature vectors
    onpows : list , (`num_files`)
        List of original npows
    tdata : list, (`num_files`)
        List of target feature vectors
    tnpows : list , (`num_files`)
        List of target npows
    opow : float, optional,
        Power threshold of original
        Default set to -100
    tpow : float, optional,
        Power threshold of target
        Default set to -100
    itnum : int , optional,
        The number of iteration
        Default set to 3
    sd : int , optional,
        Start dimension of feature vector to be used for alignment
        Default set to 0
    given_twf : array, shape (`T_new` `dim * 2`)
        Use given alignment while 1st iteration
        Default set to None
    otflag : str, optional
        Alignment into the length of specification
        'org' : alignment into original length
        'tar' : alignment into target length
        Default set to None

    Returns
    -------
    jdata : array, shape (`T_new` `dim * 2`)
        Joint static and delta feature vector
    twf : array, shape (`T_new` `dim * 2`)
        Time warping function
    mcd : float ,
        Mel-cepstrum distortion between arrays

    """
    it = 1
    num_files = len(odata)
    cvgmm, cvdata = None, None
    for it in range(1, itnum + 1):
        print('{}-th joint feature extraction starts.'.format(it))
        # alignment
        twfs, jfvs = [], []
        for i in range(num_files):
            if it == 1 and given_twfs is not None:
                gtwf = given_twfs[i]
            else:
                gtwf = None
            if it > 1:
                cvdata = cvgmm.convert(static_delta(odata[i][:, sd:]),
                                       cvtype=pconf.GMM_mcep_cvtype)
            jdata, twf, mcd = get_alignment(odata[i],
                                            onpows[i],
                                            tdata[i],
                                            tnpows[i],
                                            opow=opow,
                                            tpow=tpow,
                                            sd=sd,
                                            cvdata=cvdata,
                                            given_twf=gtwf,
                                            otflag=otflag)
            twfs.append(twf)
            jfvs.append(jdata)
            print('distortion [dB] for {}-th file: {}'.format(i + 1, mcd))
        jnt_data = transform_jnt(jfvs)

        if it != itnum:
            # train GMM, if not final iteration
            datagmm = GMMTrainer(n_mix=pconf.GMM_mcep_n_mix,
                                 n_iter=pconf.GMM_mcep_n_iter,
                                 covtype=pconf.GMM_mcep_covtype)
            datagmm.train(jnt_data)
            cvgmm = GMMConvertor(n_mix=pconf.GMM_mcep_n_mix,
                                 covtype=pconf.GMM_mcep_covtype)
            cvgmm.open_from_param(datagmm.param)
        it += 1
    return jfvs, twfs