def generateData(inFile, outFile, vuvInFile, vuvOutFile, featDim, resolution, thre=0.5):
    if not os.path.isfile(inFile):
        print inFile
        return
    if not os.path.isfile(vuvInFile):
        print vuvInFile
        return
    
    labData = py_rw.read_raw_mat(inFile, featDim)
    vuvData = py_rw.read_raw_mat(vuvInFile, 1)
    if labData.shape[0] > vuvData.shape[0]:
        labData = labData[0:vuvData.shape[0],:]
    else:
        vuvData = vuvData[0:labData.shape[0]]
    assert labData.shape[0] == vuvData.shape[0], 'Unequal length vuv and lab'
    
    maxTime    = labData.shape[0] * resolution
    labIdxBuf  = np.zeros([int(maxTime)])
    vuvBinBuf  = np.zeros([int(maxTime)])
    for idx in np.arange(labData.shape[0]):
        st = idx * resolution
        et = (idx + 1) * resolution
        labIdxBuf[st:et] = idx
        vuvBinBuf[st:et] = vuvData[idx]
        
        randU = np.random.rand(et-st)
        temp = vuvBinBuf[st:et]
        temp[randU < thre] = 0
        vuvBinBuf[st:et] = temp
        
    py_rw.write_raw_mat(labIdxBuf, outFile)
    py_rw.write_raw_mat(vuvBinBuf, vuvOutFile)
Exemplo n.º 2
0
def generate(inFile1, inFile2, outfile):
    data1 = np.asarray(py_rw.read_raw_mat(inFile1, 1), dtype=np.int32)
    data2 = py_rw.read_raw_mat(inFile2, 1)
    temp, _ = discreteF0.f0Conversion(data2.copy(), F0Max, F0Min, F0Inter,
                                      'c2d', F0Conti)
    data3 = np.zeros(data1.shape)
    data3[data2[data1] > 0] = 1
    py_rw.write_raw_mat(data3, outfile)
Exemplo n.º 3
0
def tempWarpper(fileIn, outFile, outWav):
    sr, wavdata = scipy.io.wavfile.read(fileIn)
    if wavdata.dtype == np.int16:
        #print outFile
        transData = np.array(wavdata, dtype=np.float32) / np.power(2.0, 16-1)
        py_rw.write_raw_mat(transData, outFile)
    else:
        print "Unsupported data type"
Exemplo n.º 4
0
def tempWarpper(fileIn, outFile, outWav):
    sr, wavdata = scipy.io.wavfile.read(fileIn)
    if wavdata.dtype == np.int16:
        #print outFile
        transData = np.array(wavdata, dtype=np.float32) / np.power(2.0, 16 - 1)
        py_rw.write_raw_mat(transData, outFile)
    else:
        print("Unsupported data type")
Exemplo n.º 5
0
def f0ip2f0(f0_ip_file, vuv_file, output_file, vu_threshold=0.5, unvoiced_value=-1.0e+10):
    #print("%s %s %s %f" % (f0_ip_file, vuv_file, output_file, vu_threshold))
    f0_ip_data = py_rw.read_raw_lf0(f0_ip_file, 1)
    vuv_data = py_rw.read_raw_mat(vuv_file, 1)
    assert f0_ip_data.shape[0]==vuv_data.shape[0], "Error: unequal length %s %s" % (f0_ip_file, vuv_file)
    #print(str(np.where(vuv_data<vu_threshold)))
    f0_ip_data[vuv_data<vu_threshold] = unvoiced_value
    py_rw.write_raw_mat(f0_ip_data, output_file)
def generateLabIndex(labfile, outfile, resolution = 80, res=50000):
    sTime, eTime, labEntry=htslab.read_full_lab(labfile, res)
    maxTime = eTime[-1] * resolution
    outBuf  = np.zeros([int(maxTime)])
    for idx in np.arange(int(eTime[-1])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
def generateLabIndex(labfile, outfile, resolution=80, res=50000):
    sTime, eTime, labEntry = htslab.read_full_lab(labfile, res)
    maxTime = eTime[-1] * resolution
    outBuf = np.zeros([int(maxTime)])
    for idx in np.arange(int(eTime[-1])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
Exemplo n.º 8
0
def CreateTimeMatrix(DataFile):
    """
    """
    phofile = PhoAlign + os.path.sep + DataFile + '.lab'
    sylfile = SylAlign + os.path.sep + DataFile + '.lab'
    worfile = WorAlign + os.path.sep + DataFile + '.lab'

    phodata = htslab.read_full_lab(phofile)
    syldata = htslab.read_full_lab(sylfile)
    wordata = htslab.read_full_lab(worfile)

    if len(wordata[1]) != len(syldata[1]) or wordata[1][-1] != syldata[1][-1]:
        print("\t Unequal Length %s" % (DataFile))
        return 0

    DataTime = np.int(syldata[1][-1])

    # default, update at every frame level
    dataMat = np.bitwise_or(np.zeros([DataTime], dtype=np.int8),
                            bitInfo['fra'])

    preSyl = ''
    preWor = ''

    for idx1 in range(len(syldata[0])):
        frameStart = syldata[0][idx1]
        frameEnd = syldata[1][idx1]

        # update the phoneme state
        dataMat[frameStart] = np.bitwise_or(dataMat[frameStart],
                                            bitInfo['pho'])

        syllabel = syldata[2][idx1]
        worlabel = wordata[2][idx1]

        if syllabel != preSyl:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart],
                                                bitInfo['syl'])
        if worlabel != preWor:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart],
                                                bitInfo['wor'])
        if len(preWor) == 0 or preWor == phraseSym or worlabel == phraseSym:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart],
                                                bitInfo['phr'])
        preSyl = syllabel
        preWor = worlabel
        if CheckBinary:
            pholabel = phodata[2][idx1]
            for t in range(np.int(frameStart), np.int(frameEnd)):
                print("%d, %s [%s %s %s]" %
                      (t, np.binary_repr(dataMat[t], len(bitInfo)),
                       pholabel[0:6], syllabel[0:6], worlabel[0:6]))

    py_rw.write_raw_mat(dataMat, DataDir + os.path.sep + DataFile + '.bin',
                        'u1')
    return DataTime
Exemplo n.º 9
0
def generateLabIndex(labfile, outfile, labDim, resolution):
    data = py_rw.read_raw_mat(labfile, labDim)
    
    maxTime = data.shape[0] * resolution
    
    outBuf  = np.zeros([int(maxTime)])
    
    for idx in np.arange(int(data.shape[0])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
Exemplo n.º 10
0
def generateLabIndex(labfile, outfile, labDim, resolution):
    data = py_rw.read_raw_mat(labfile, labDim)

    maxTime = data.shape[0] * resolution

    outBuf = np.zeros([int(maxTime)])

    for idx in np.arange(int(data.shape[0])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
Exemplo n.º 11
0
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt):
    conlf0Name = inDir + os.path.sep + fileName + f0Ext
    vuName     = uvDir + os.path.sep + fileName + vuExt
    print(fileName, end=' ')
    if os.path.isfile(conlf0Name) and os.path.isfile(vuName):
        conlf0 = py_rw.read_raw_mat(conlf0Name, 1)
        vu     = py_rw.read_raw_mat(vuName, 1)
        assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length'
        conlf0[vu < uvT] = -1.0e+10
        py_rw.write_raw_mat(conlf0, conlf0Name)
        print(': done')
    else:
        print(': not found')
Exemplo n.º 12
0
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt):
    conlf0Name = inDir + os.path.sep + fileName + f0Ext
    vuName     = uvDir + os.path.sep + fileName + vuExt
    print fileName,
    if os.path.isfile(conlf0Name) and os.path.isfile(vuName):
        conlf0 = py_rw.read_raw_mat(conlf0Name, 1)
        vu     = py_rw.read_raw_mat(vuName, 1)
        assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length'
        conlf0[vu < uvT] = -1.0e+10
        py_rw.write_raw_mat(conlf0, conlf0Name)
        print ': done'
    else:
        print ': not found'
Exemplo n.º 13
0
def f0convert(f0File, qF0Output, vuvOutputFile, f0Zero, f0Max, f0Min, f0Inter, f0Conti, f0Type):
    if f0Type == 0:
        data  = py_rw.read_raw_mat(f0File, 1)
        idx   = data > 0
        data[idx] = 1127.0 * np.log(data[idx]/700.0 + 1)
    elif f0Type == 1:
        data  = py_rw.read_raw_lf0(f0File, 1)
        idx   = data > 0
    F0Idx = data>f0Zero
    dataClas, vuv = discreteF0.f0Conversion(data.copy(), f0Max, f0Min, f0Inter, 'c2d', f0Conti)
    dataClas[vuv<1] = 0.0
    py_rw.write_raw_mat(dataClas, qF0Output)
    py_rw.write_raw_mat(vuv, vuvOutputFile)
def tempWarpper(fileIn, outFile, outWav, quanLevel):
    sr, wavdata = scipy.io.wavfile.read(inFile)
    if wavdata.dtype == np.int16:
        transData = wavTool.wavformConvert(wavdata, 16, True, quanLevel)
    elif wavdata.dtype == np.int32:
        transData = wavTool.wavformConvert(wavdata, 32, True, quanLevel)
    else:
        print "Unsupported data type"
    py_rw.write_raw_mat(transData, outFile)

    if outWav is not None:
        recoData = wavTool.wavformDeconvert(transData, quanLevel)
        wavTool.waveSaveFromFloat(recoData, outWav, sr=sr)
Exemplo n.º 15
0
def tempWarpper(fileIn, outFile, outWav, quanLevel):
    sr, wavdata = scipy.io.wavfile.read(inFile)
    if wavdata.dtype == np.int16:
        transData = wavTool.wavformConvert(wavdata, 16, True, quanLevel)
    elif wavdata.dtype == np.int32:
        transData = wavTool.wavformConvert(wavdata, 32, True, quanLevel)
    else:
        print "Unsupported data type"
    py_rw.write_raw_mat(transData, outFile)

    if outWav is not None:
        recoData  = wavTool.wavformDeconvert(transData, quanLevel)
        wavTool.waveSaveFromFloat(recoData, outWav, sr=sr)
Exemplo n.º 16
0
def createMdnConfig(mdnConfigFile,
                    MDNType,
                    MDNTargetDim,
                    ARDynamic=None,
                    tieVariance=0):
    """Create the mdn.config for MDN CURRENNT
    """
    if ARDynamic is None:
        # default, no AR dynamic
        ARDynamic = np.ones([len(MDNType)]) * -1.0

    bias = 0
    MDNNNOutDim = []
    for idx, mdnConfig in enumerate(MDNType):
        temp = distParaNum(MDNTargetDim[idx][1] - MDNTargetDim[idx][0],
                           mdnConfig, tieVariance, ARDynamic[idx])
        MDNNNOutDim.append([bias, bias + temp])
        bias = temp + bias
    #print MDNNNOutDim

    # check and generating the MDN configuration
    mdnconfigdata = np.zeros([1 + len(MDNType) * 5], dtype=np.float32)
    mdnconfigdata[0] = len(MDNType)

    tmp = 0
    for idx, mdnConfig in enumerate(MDNType):
        mdntarDim = MDNTargetDim[idx]
        mdnoutDim = MDNNNOutDim[idx]
        tmp1 = distParaNum(mdntarDim[1] - mdntarDim[0], mdnConfig, tieVariance,
                           ARDynamic[idx])
        tmp2 = (mdnoutDim[1] - mdnoutDim[0])

        if mdnConfig > 0:
            assert tmp1 == tmp2, "Error in MDN mixture configuraiton"
            tmp = tmp + tmp2

        elif mdnConfig < 0:
            assert mdntarDim[1] - mdntarDim[
                0] == 1, "Softmax to 1 dimension targert"
            #tmp = tmp + 1
            tmp = tmp + tmp2
            mdnConfig = -1  # change it back to -1
        else:
            tmp = tmp + tmp2
        mdnconfigdata[(idx * 5 + 1):((idx + 1) * 5 + 1)] = [
            mdnoutDim[0], mdnoutDim[1], mdntarDim[0], mdntarDim[1], mdnConfig
        ]

    #print "Dimension of output of NN should be %d" % (tmp)
    py_rw.write_raw_mat(mdnconfigdata, mdnConfigFile)
    return tmp
Exemplo n.º 17
0
def data_normalize(dataFile, dim, mask=None, ran=False):
    
    data = drh.read_raw_mat(dataFile, dim)
    if ran==True:
        ranfile = os.path.dirname(dataFile)+os.path.sep+'random_index'
        if os.path.isfile(ranfile):
            print "Found random index %s" % (ranfile)
            randidx = np.asarray(drh.read_raw_mat(ranfile,1), dtype=np.int32)
            if randidx.shape[0]!=data.shape[0]:
                print "But it unmatches the data. New random_index will be generated"
                randidx = np.array(range(data.shape[0]))
                random.shuffle(randidx)
                drh.write_raw_mat(randidx, ranfile)
            else:
                pass
        else:
            randidx = np.array(range(data.shape[0]))
            random.shuffle(randidx)
            drh.write_raw_mat(randidx, ranfile)
        data = data[randidx,:]

    meanData = data.mean(axis=0)
    stdData  = data.std(axis=0)
    if mask is None:
        pass
    else:
        stdData[mask>0] = 0.0 # set to zero
    
    idx = stdData>0.000001   # a threshold to normalize
    data[:,idx] = (data[:,idx]-meanData[idx])/stdData[idx]
    
    drh.write_raw_mat(data,dataFile+'.norm')
    drh.write_raw_mat(np.concatenate((meanData, stdData)), dataFile+'.mv')
Exemplo n.º 18
0
def CreateTimeMatrix(DataFile):    
    """
    """
    phofile  = PhoAlign + os.path.sep + DataFile + '.lab'
    sylfile  = SylAlign + os.path.sep + DataFile + '.lab'
    worfile  = WorAlign + os.path.sep + DataFile + '.lab'
    
    phodata  = htslab.read_full_lab(phofile)
    syldata  = htslab.read_full_lab(sylfile)
    wordata  = htslab.read_full_lab(worfile)
    
    if len(wordata[1]) != len(syldata[1]) or wordata[1][-1] != syldata[1][-1]:
        print "\t Unequal Length %s" % (DataFile)
        return 0
    
    DataTime = np.int(syldata[1][-1])
    
    # default, update at every frame level
    dataMat  = np.bitwise_or(np.zeros([DataTime], dtype=np.int8), bitInfo['fra'])
    
    preSyl = ''
    preWor = ''
    
    for idx1 in xrange(len(syldata[0])):
        frameStart = syldata[0][idx1]
        frameEnd   = syldata[1][idx1]
        
        # update the phoneme state
        dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['pho'])
        
        syllabel   = syldata[2][idx1]
        worlabel   = wordata[2][idx1]
        
        if syllabel !=  preSyl:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['syl'])
        if worlabel !=  preWor:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['wor'])
        if len(preWor)==0 or preWor == phraseSym or worlabel == phraseSym:
            dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['phr'])
        preSyl = syllabel
        preWor = worlabel
        if CheckBinary:
            pholabel   = phodata[2][idx1]
            for t in range(np.int(frameStart), np.int(frameEnd)):
                print "%d, %s [%s %s %s]" % (t,np.binary_repr(dataMat[t], len(bitInfo)),
                                             pholabel[0:6], syllabel[0:6], worlabel[0:6])

    py_rw.write_raw_mat(dataMat, DataDir+os.path.sep+DataFile+'.bin', 'u1')
    return DataTime
Exemplo n.º 19
0
def meanStdNormMask(fileScps,
                    fileDims,
                    fileNormMask,
                    meanStdOutPath,
                    f0Dim=-1):
    """
    """
    assert len(fileDims) == len(fileNormMask), \
        "Unequal length feature dim & norm mask"

    # calcualte the mean/std
    stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked',
                           f0Dim)

    meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1)

    assert meanStdData.shape[0] == sum(fileDims) * 2, \
        "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2)

    featDims = []
    startDim = 0
    for dim in fileDims:
        featDims.append([startDim, startDim + dim])
        startDim = startDim + dim

    for dimRange, normMask in zip(featDims, fileNormMask):
        if len(normMask) == 0:
            pass
        elif len(normMask) == 1 and (normMask[0] == 0
                                     or normMask[0] == 'not_norm'):
            meanStdData[dimRange[0]:dimRange[1]] = 0.0
            meanStdData[dimRange[0] + sum(fileDims):dimRange[1] +
                        sum(fileDims)] = 1.0
        elif len(normMask) == 2:
            assert dimRange[0] <= normMask[0], 'normMask range error' % (
                str(normMask))
            assert dimRange[1] >= normMask[1], 'normMask range error' % (
                str(normMask))
            meanStdData[normMask[0]:normMask[1]] = 0.0
            meanStdData[normMask[0] + sum(fileDims):normMask[1] +
                        sum(fileDims)] = 1.0
        else:
            print("Wrong format of NormMask %s" % (str(normMask)))
        print('normmask %s' % (str(normMask)))

    py_rw.write_raw_mat(meanStdData, meanStdOutPath)
Exemplo n.º 20
0
def createMdnConfig(mdnConfigFile, MDNType, MDNTargetDim, ARDynamic=None, tieVariance=0):
    """Create the mdn.config for MDN CURRENNT
    """
    if ARDynamic is None:
        # default, no AR dynamic
        ARDynamic = np.ones([len(MDNType)]) * -1.0
    
    bias = 0
    MDNNNOutDim = []
    for idx, mdnConfig in enumerate(MDNType):
        temp = distParaNum(MDNTargetDim[idx][1] - MDNTargetDim[idx][0], mdnConfig,
                           tieVariance, ARDynamic[idx])
        MDNNNOutDim.append([bias, bias+temp])
        bias = temp+bias
    #print MDNNNOutDim

    # check and generating the MDN configuration
    mdnconfigdata = np.zeros([1+len(MDNType)*5], dtype = np.float32)
    mdnconfigdata[0] = len(MDNType)

    tmp = 0
    for idx, mdnConfig in enumerate(MDNType):
        mdntarDim = MDNTargetDim[idx]
        mdnoutDim = MDNNNOutDim[idx]
        tmp1 = distParaNum(mdntarDim[1]-mdntarDim[0], mdnConfig, tieVariance, ARDynamic[idx])
        tmp2 = (mdnoutDim[1]-mdnoutDim[0])

        if mdnConfig > 0:
            assert tmp1 == tmp2, "Error in MDN mixture configuraiton"
            tmp = tmp + tmp2

        elif mdnConfig < 0:
            assert mdntarDim[1]-mdntarDim[0]==1, "Softmax to 1 dimension targert"
            tmp = tmp + 1
            mdnConfig = -1 # change it back to -1
        else:
            tmp = tmp + tmp2
        mdnconfigdata[(idx*5+1):((idx+1)*5+1)] = [mdnoutDim[0],mdnoutDim[1],
                                                  mdntarDim[0],mdntarDim[1],
                                                  mdnConfig]

    #print "Dimension of output of NN should be %d" % (tmp)
    py_rw.write_raw_mat(mdnconfigdata, mdnConfigFile)
    return tmp
Exemplo n.º 21
0
def meanStdNormMask(fileScps, fileDims, fileNormMask, meanStdOutPath, f0Dim=-1):
    """
    """
    assert len(fileDims) == len(fileNormMask), \
        "Unequal length feature dim & norm mask"
    
    # calcualte the mean/std
    stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked', f0Dim)

    meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1)
    
    assert meanStdData.shape[0] == sum(fileDims) * 2, \
        "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2)

    featDims = []
    startDim = 0
    for dim in fileDims:
        featDims.append([startDim, startDim + dim])
        startDim = startDim + dim
    
    for dimRange, normMask in zip(featDims, fileNormMask):
        if len(normMask) == 0:
            pass
        elif len(normMask) == 1 and normMask[0] == 0:
            meanStdData[dimRange[0]:dimRange[1]] = 0.0
            meanStdData[dimRange[0]+sum(fileDims):dimRange[1]+sum(fileDims)] = 1.0
        elif len(normMask) == 2:
            assert dimRange[0] <= normMask[0], 'normMask range error' % (str(normMask))
            assert dimRange[1] >= normMask[1], 'normMask range error' % (str(normMask))
            meanStdData[normMask[0]:normMask[1]] = 0.0
            meanStdData[normMask[0]+sum(fileDims):normMask[1]+sum(fileDims)] = 1.0
        else:
            print "Wrong format of NormMask %s" % (str(normMask))
        print 'normmask %s' % (str(normMask))
        
    py_rw.write_raw_mat(meanStdData, meanStdOutPath)
Exemplo n.º 22
0
#!/usr/bin/python

from speechTools import wavTool
from scipy.io import wavfile
from ioTools import readwrite as py_rw
import os
import sys
import numpy as np

dirPath = sys.argv[1]
quantiBitNum = int(sys.argv[2])
samplingRate = int(sys.argv[3])

fileList = py_rw.read_txt_list(dirPath + '/gen.scp')
for fileName in fileList:
    fileName = fileName.rstrip('\n')
    nameHtk  = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.htk'
    nameRaw  = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.raw'
    nameWav  = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.wav'
    print nameRaw, nameWav
    data = py_rw.read_htk(nameHtk, 'f4', 'b')
    
    if quantiBitNum > 0:
        quantiLevel = np.power(2, quantiBitNum)-1
        py_rw.write_raw_mat(data, nameRaw)
        wavTool.raw2wav(nameRaw, nameWav, quantiLevel, samplingRate=samplingRate)
    else:
        wavfile.write(nameWav, samplingRate, data)
Exemplo n.º 23
0
from ioTools import readwrite
import kaldi_io
import numpy as np

args = sys.argv
data_dir = args[1]
out_dir = args[2]

dataname = basename(data_dir)
xvector_file = "exp/0007_voxceleb_v2_1a/exp/xvector_nnet_1a/am_nsf/xvectors_" + dataname + "/xvector.scp"
xvec_out_dir = join(out_dir, "xvector")
pitch_out_dir = join(out_dir, "f0")

# Write pitch features
pitch_file = join(data_dir, 'pitch.scp')
pitch2shape = {}
for key, mat in kaldi_io.read_mat_scp(pitch_file):
    #pitch2shape[key] = mat.shape
    readwrite.write_raw_mat(mat[:, 1], join(pitch_out_dir, key + '.f0'))
'''
# Write xvector features
with open(xvector_file) as f:
    for key, mat in kaldi_io.read_vec_flt_scp(f):
        #print key, mat.shape
        plen = pitch2shape[key][0]
        mat = mat[np.newaxis]
        xvec = np.repeat(mat, plen, axis=0)
        readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector'))
'''
Exemplo n.º 24
0
    # std threshold
    # if std < threshold, set std = 1.0 (not use std for normalization)
    stdThresh = 0.0000001

    # If mean and std are calculated over multiple types of features,
    # please specify fileDirs, fileExts, dataDims for each type of feature.
    # The output contains a mean/std vector concatenated from all types of features

    dataMvBuffer = []

    for idx, fileDir in enumerate(fileDirs):
        fileExt = fileExts[idx]
        dataDim = dataDims[idx]

        dataMv = np.zeros([2, dataDim], dtype=np.float64)
        dataCounter = 0
        with open(fileList, 'r') as filePtr:
            for fileName in filePtr:
                fileName = fileName.rstrip('\n')
                print(fileName)
                fileName = fileDir + '/' + fileName + fileExt
                dataMv, dataCounter = temprapper(fileName, dataDim, dataMv,
                                                 dataCounter)

        dataMv[1] = np.sqrt(dataMv[1] / (dataCounter - 1))
        dataMv = np.asarray(dataMv, dtype=np.float32)
        dataMvBuffer.append(dataMv)
    dataMvBuffer = np.concatenate(dataMvBuffer, axis=1)
    dataMvBuffer[1, np.where(dataMvBuffer[1, :] < stdThresh)] = 1.0
    py_rw.write_raw_mat(dataMvBuffer, dataOut)
Exemplo n.º 25
0
def lf02f0_file(data_file, out_file, thres=10.0):
    data = py_rw.read_raw_lf0(data_file, 1)
    data[data>thres] = lf02f0(data[data>thres])
    data[data<=thres] = 0.0
    py_rw.write_raw_mat(data, out_file)
        frameSum = frameSum + data.shape[0]

        F0Idx         = data>10
        dataClas, vuv = discreteF0.f0Conversion(data, F0Max, F0Min, F0Inter, 'c2d', F0Conti)
        dataClas[vuv<1] = 0.0
        #dataClas = np.zeros([data.shape[0]])
        #if F0Conti:
        #    # Continous F0
        #    pass
        #    dataClas[F0Idx] = np.round((data[F0Idx] - F0Min)/(F0Max - F0Min) * (F0Inter - 1))
        #else:
        #    # Discontinuous F0, leave one dimension for unvoiced
        #    dataClas[F0Idx] = np.round((data[F0Idx] - F0Min)/(F0Max - F0Min) * (F0Inter - 2)) + 1
        
        tmpmax = np.max(data[F0Idx])
        tmpmin = np.min(data[F0Idx])
        tmpmax2 = np.max(dataClas[F0Idx])
        tmpmin2 = np.min(dataClas[F0Idx])
        
        print tmpmax, tmpmin, tmpmax2, tmpmin2
        frameMax = np.max([frameMax, tmpmax])
        frameMin = np.min([frameMin, tmpmin])
        
        filePath = fileOut + os.path.sep + fileName + fileExt + '_class'
        py_rw.write_raw_mat(dataClas, filePath)

print "\nmax F0 event, min F0 event, #frame"
print frameMax, frameMin, frameSum
        
Exemplo n.º 27
0
        fileDir  = sys.argv[2]
        fileList = sys.argv[3]
        fileExt  = sys.argv[4]
        fileDim  = int(sys.argv[5])
        fileOut  = sys.argv[6]

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                cnt = cnt + 1

        gvData = np.zeros([cnt, fileDim])
        
        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                fileName = fileName.rstrip('\n')
                data = py_rw.read_raw_mat(fileDir + os.path.sep + fileName + fileExt, fileDim)
                if (fileExt == '.lf0' or fileExt =='.f0') and fileDim == 1:
                    data = data[data>0]
                gvData[cnt, :] = gv(data)
                cnt = cnt + 1
                #print fileName
        py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin')
        print fileOut, '\t', np.median(gvData, axis=0)
                
                
        
    

Exemplo n.º 28
0
    # the netTopo should be
    netTopo = [382, 512, 256, 259]
    
    # the layerType should be
    # [0]: input layer
    # [1]: feedforward layer
    # [2]: blstm layer
    layerType = [0, 1, 1, 1]
    
    # Now, specify the connection between the 1st and 2nd, 2nd and 3rd, 3rd and 4th layer
    # [input_start_dimension, input_end_dimension, output_start_dimension, output_end_dimension]
    #
    # I want to connect the 1-256th components of 2nd layer to the 1-128th elements of 3rd layer
    #     it's [0, 256, 0, 128]
    # Then 257-512th components of 2nd layer to the 129-256th of 3rd layer
    #     [256, 512, 128, 256] 
    # NOTE: python style of index [0, 5] => [0, 1, 2, 3, 4] 
    netconfig= [[],  # void for the 1st-2nd layer
                [[0, 256, 0, 128], [256, 512, 128, 256]], # 2nd-3rd layer
                []]  # void for the 3rd-4th layer

    # 
    netTopo = np.array(netTopo)
    netTopoNoInput = netTopo[1:]
    layerNM, TotalWNum = getNetStruct(netTopo, netTopoNoInput, layerType)
    weight = genWeightMast(TotalWNum, netconfig, layerNM, netTopoNoInput, 
                           netTopo, layerType, 1)
    py_rw.write_raw_mat(weight, './weightMask')
    
    #os.system("cat ./" + __file__ + " > ./log")
if __name__ == "__main__":

    if sys.argv[1] == 'gv':
        fileDir = sys.argv[2]
        fileList = sys.argv[3]
        fileExt = sys.argv[4]
        fileDim = int(sys.argv[5])
        fileOut = sys.argv[6]

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                cnt = cnt + 1

        gvData = np.zeros([cnt, fileDim])

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                fileName = fileName.rstrip('\n')
                data = py_rw.read_raw_mat(
                    fileDir + os.path.sep + fileName + fileExt, fileDim)
                if (fileExt == '.lf0' or fileExt == '.f0') and fileDim == 1:
                    data = data[data > 0]
                gvData[cnt, :] = gv(data)
                cnt = cnt + 1
                #print fileName
        py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin')
        print(fileOut, '\t', np.median(gvData, axis=0))
Exemplo n.º 30
0
def f02lf0_file(data_file, out_file, thres=10.0, unvoiced_value=-1.0e+10):
    data = py_rw.read_raw_lf0(data_file, 1)
    data[data>thres] = f02lf0(data[data>thres])
    data[data<=thres] = unvoiced_value
    py_rw.write_raw_mat(data, out_file)
Exemplo n.º 31
0
from ioTools import readwrite as py_rw
import os
import sys
import numpy as np

dirPath = sys.argv[1]
quantiBitNum = int(sys.argv[2])
samplingRate = int(sys.argv[3])

fileList = py_rw.read_txt_list(dirPath + '/gen.scp')
for fileName in fileList:
    fileName = fileName.rstrip('\n')
    nameHtk = dirPath + os.path.sep + os.path.basename(fileName).rstrip(
        '.htk') + '.htk'
    nameRaw = dirPath + os.path.sep + os.path.basename(fileName).rstrip(
        '.htk') + '.raw'
    nameWav = dirPath + os.path.sep + os.path.basename(fileName).rstrip(
        '.htk') + '.wav'
    print nameRaw, nameWav
    data = py_rw.read_htk(nameHtk, 'f4', 'b')

    if quantiBitNum > 0:
        quantiLevel = np.power(2, quantiBitNum) - 1
        py_rw.write_raw_mat(data, nameRaw)
        wavTool.raw2wav(nameRaw,
                        nameWav,
                        quantiLevel,
                        samplingRate=samplingRate)
    else:
        wavfile.write(nameWav, samplingRate, data)
    bias = temp+bias
print MDNNNOutDim

# check and generating the MDN configuration
mdnconfigdata = np.zeros([1+len(MDNType)*5], dtype = np.float32)
mdnconfigdata[0] = len(MDNType)

tmp = 0
for idx, mdntype in enumerate(MDNType):
    mdntarDim = MDNTargetDim[idx]
    mdnoutDim = MDNNNOutDim[idx]
    tmp1 = kmixPara(mdntarDim[1]-mdntarDim[0], mdntype, tieVariance)
    tmp2 = (mdnoutDim[1]-mdnoutDim[0])
        
    if mdntype > 0:
        assert tmp1 == tmp2, "Error in MDN mixture configuraiton"
        tmp = tmp + tmp2
        
    elif mdntype < 0:
        assert mdntarDim[1]-mdntarDim[0]==1, "Softmax to 1 dimension targert"
        tmp = tmp + (mdnoutDim[1]-mdnoutDim[0])
        mdntype = -1 # change it back to -1
    else:
        tmp = tmp + tmp2
    mdnconfigdata[(idx*5+1):((idx+1)*5+1)] = [mdnoutDim[0],mdnoutDim[1],
                                              mdntarDim[0],mdntarDim[1],
                                              mdntype]

print "Dimension of output of NN should be %d" % (tmp)
py_rw.write_raw_mat(mdnconfigdata, mdnconfig)
import sys
from os.path import join, basename

from ioTools import readwrite
from kaldiio import ReadHelper

args = sys.argv
ppg_file = args[1]
out_dir = args[2]

ppg_out_dir = join(out_dir, "ppg")

print("Writing PPG feats.....")
# Write ppg features
with ReadHelper('scp:'+ppg_file) as reader:
    for key, mat in reader:
        readwrite.write_raw_mat(mat, join(ppg_out_dir, key+'.ppg'))
print("Finished writing PPG feats.")

Exemplo n.º 34
0
        source_stats = {}
        with open(statsdir + dataname + "/" +
                  key.split("-")[0].split("_")[0]) as f:
            source_stats = json.load(f)

        selected_target_speaker_list = [target_spk]

        pseudo_speaker_f0_stats = {"mu_s": 0, "var_s": 0, "std_s": 0}
        for selected_target_speaker in selected_target_speaker_list:
            target_speaker_stats = {}
            with open(statsdir + dataset_of_target + "/" +
                      selected_target_speaker) as f:
                target_speaker_stats = json.load(f)
                mu = target_speaker_stats["mu_s"]
                var = target_speaker_stats["var_s"]
                pseudo_speaker_f0_stats["mu_s"] += mu
                pseudo_speaker_f0_stats["var_s"] += var
        pseudo_speaker_f0_stats["var_s"] /= len(selected_target_speaker_list)
        pseudo_speaker_f0_stats["mu_s"] /= len(selected_target_speaker_list)
        pseudo_speaker_f0_stats["std_s"] = math.sqrt(
            pseudo_speaker_f0_stats["var_s"])

        transfomation = {
            **source_stats, "mu_t": pseudo_speaker_f0_stats["mu_s"],
            "std_t": pseudo_speaker_f0_stats["std_s"]
        }

        f0t = log_linear_transformation(f0.copy(), transfomation)

        readwrite.write_raw_mat(f0t, join(pitch_out_dir, key + '.f0'))
if os.path.isdir(dataOut):
    pass
else:
    os.mkdir(dataOut)

for dataFile in dataList:
    if re.search(dataPattern, dataFile):
        data = io.netcdf_file(dataDir + os.path.sep + dataFile)
        uttNum = data.dimensions['numSeqs']
        seqLengths = data.variables['seqLengths'][:].copy()
        seqLengths = np.concatenate((np.array([0]), seqLengths)).cumsum()
        seqTags = data.variables['seqTags'][:]
        if inOutData == 1:
            dataAll = data.variables['inputs'][:]
        else:
            dataAll = data.variables['targetPatterns'][:]
        for i in range(uttNum):
            outName = dataOut + os.path.sep + ''.join(seqTags[i])

            for j, suf in enumerate(name):
                outFile = outName + suf
                tmpdata = dataAll[seqLengths[i]:seqLengths[i + 1],
                                  dim[j][0]:dim[j][1]].copy()
                if dataMV is not None:
                    tmpdata = tmpdata * varVec[dim[j][0]:dim[j][1]] + meanVec[
                        dim[j][0]:dim[j][1]]
                py_rw.write_raw_mat(tmpdata, outFile)
            print("%s Utt %d" % (dataFile, i))
        del dataAll, seqTags, seqLengths, uttNum
        data.close()
Exemplo n.º 36
0
if os.path.isdir(dataOut):
    pass
else:
    os.mkdir(dataOut)


for dataFile in dataList:
    if re.search(dataPattern, dataFile):
        data = io.netcdf_file(dataDir+os.path.sep+dataFile)
        uttNum = data.dimensions['numSeqs']
        seqLengths = data.variables['seqLengths'][:].copy()
        seqLengths = np.concatenate((np.array([0]), seqLengths)).cumsum()
        seqTags   = data.variables['seqTags'][:]
        if inOutData == 1:
            dataAll = data.variables['inputs'][:]
        else:
            dataAll = data.variables['targetPatterns'][:]
        for i in xrange(uttNum):
            outName = dataOut+os.path.sep+''.join(seqTags[i])

            for j, suf in enumerate(name):
                outFile = outName + suf
                tmpdata = dataAll[seqLengths[i]:seqLengths[i+1],dim[j][0]:dim[j][1]].copy()
                if dataMV is not None:
                    tmpdata = tmpdata*varVec[dim[j][0]:dim[j][1]]+meanVec[dim[j][0]:dim[j][1]]
                py_rw.write_raw_mat(tmpdata, outFile)
            print "%s Utt %d" % (dataFile, i)
        del dataAll, seqTags, seqLengths, uttNum
        data.close()

yaap_pitch_dir = join(data_dir, 'yaapt_pitch')
xvec_out_dir = join(out_dir, "xvector")
pitch_out_dir = join(out_dir, "f0")

# Write pitch features
pitch_file = join(data_dir, 'pitch.scp')
pitch2shape = {}
with ReadHelper('scp:'+pitch_file) as reader:
    for key, mat in reader:
        pitch2shape[key] = mat.shape[0]
        kaldi_f0 = mat[:, 1].squeeze().copy()
        yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1)
        #unvoiced = np.where(yaapt_f0 == 0)[0]
        #kaldi_f0[unvoiced] = 0
        #readwrite.write_raw_mat(kaldi_f0, join(pitch_out_dir, key+'.f0'))
        f0 = np.zeros(kaldi_f0.shape)
        f0[:yaapt_f0.shape[0]] = yaapt_f0
        readwrite.write_raw_mat(f0, join(pitch_out_dir, key+'.f0'))


# Write xvector features
with ReadHelper('scp:'+xvector_file) as reader:
    for key, mat in reader:
        #print key, mat.shape
        plen = pitch2shape[key]
        mat = mat[np.newaxis]
        xvec = np.repeat(mat, plen, axis=0)
        readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector'))


Exemplo n.º 38
0
    #
    # skippara, feedforward can be specified using [row_start, row_end, col_start, col_end] for each block
    # blstm:
    netconfig = [[], [[0, 5, 0, 4], [5, 10, 4, 8]], []]

    netTopo = [382, 768, 768, 768, 768, 768, 768, 259]
    layerType = dupVec(1, len(netTopo))
    layerType[0] = 0

    netconfig = dupVec([[0, 382, 0, 382], [382, 382 + 256, 382, 382 + 256],
                        [382 + 256, 768, 382 + 256, 768]],
                       len(netTopo) - 1)
    netconfig[0] = []
    netconfig[1] = []
    netconfig[len(netTopo) - 2] = [[0, 382, 0, 180],
                                   [382, 382 + 256, 180, 184],
                                   [382 + 256, 768, 184, 259]]

    netTopo = np.array(netTopo)
    netTopoNoInput = netTopo[1:]
    layerNM, TotalWNum = getNetStruct(netTopo, netTopoNoInput, layerType)
    weight = genWeightMast(TotalWNum, netconfig, layerNM, netTopoNoInput,
                           netTopo, layerType, 1)
    py_rw.write_raw_mat(weight, './weightMask')
    os.system("cat ./" + __file__ + " > ./log")
    fileptr = open('./log', 'a')
    fileptr.write('Number of parameter: %d \n' % (TotalWNum))
    fileptr.write('Number of non-zero parameter: %d \n' % (np.sum(weight)))
    fileptr.close()
    print np.sum(weight)
Exemplo n.º 39
0
import sys
from os.path import join, basename

from ioTools import readwrite
import kaldi_io

args = sys.argv
mspec_file = args[1]
out_dir = args[2]

mspec_out_dir = join(out_dir, "mel")

print "Writing MEL feats....."
# Write mspec features
for key, mat in kaldi_io.read_mat_scp(mspec_file):
    #print key, mat.shape
    readwrite.write_raw_mat(mat, join(mspec_out_dir, key + '.mel'))
print "Finished writing MEL feats."