Пример #1
0
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)

    # check the data length
    if np.abs(data1.shape[0] - data2.shape[0]) * 2.0 / (data1.shape[0] + data2.shape[0]) > 0.2:
        print "Warning: length mis-match: %s %d, %s %d" % (file1, data1.shape[0],
                                                           file2, data2.shape[0])
        
    # slightly change the length of data
    if data1.shape[0]>data2.shape[0]:
        if dim==1:
            data1 = data1[0:data2.shape[0]]
        else:
            data1 = data1[0:data2.shape[0],:]
    elif data1.shape[0]<data2.shape[0]:
        if dim==1:
            data2 = data2[0:data1.shape[0]]
        else:
            data2 = data2[0:data1.shape[0],:]
    
    
    if dim==1:
        # This is F0 
        diff = np.zeros([data1.shape[0], 3])
        temp1 = data1 > 0
        temp2 = data2 > 0
        
        # all voiced time steps
        indp = (temp1 * temp2)
        
        # u/v different time steps
        indn = np.bitwise_xor(temp1, temp2)

        # number of voiced frames
        voiceFrame = sum(indp)               
        
        if voiceFrame>0:
            data1 = F0Transform(data1[indp])
            data2 = F0Transform(data2[indp])
            diff[indp,0] = data1-data2 
            diff[indn,1] = 1           
            diff[indp,2] = 1
            rmse = diff*diff
            
            #corr = scipy.stats.pearsonr(data1,data2)
            corr = scipy.stats.spearmanr(data1, data2)
            
        else:
            corr = [np.nan, 0]
            rmse = diff * np.nan
        
    else:
        diff = data1 - data2
        rmse = diff*diff
        voiceFrame = data1.shape[0]
        corr = -10

    return rmse, data1.shape[0], corr
Пример #2
0
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)

    # check the data length
    if np.abs(data1.shape[0] - data2.shape[0]) * 2.0 / (data1.shape[0] +
                                                        data2.shape[0]) > 0.2:
        print("Warning: length mis-match: %s %d, %s %d" %
              (file1, data1.shape[0], file2, data2.shape[0]))

    # slightly change the length of data
    if data1.shape[0] > data2.shape[0]:
        if dim == 1:
            data1 = data1[0:data2.shape[0]]
        else:
            data1 = data1[0:data2.shape[0], :]
    elif data1.shape[0] < data2.shape[0]:
        if dim == 1:
            data2 = data2[0:data1.shape[0]]
        else:
            data2 = data2[0:data1.shape[0], :]

    if dim == 1:
        # This is F0
        diff = np.zeros([data1.shape[0], 3])
        temp1 = data1 > 0
        temp2 = data2 > 0

        # all voiced time steps
        indp = (temp1 * temp2)

        # u/v different time steps
        indn = np.bitwise_xor(temp1, temp2)

        # number of voiced frames
        voiceFrame = sum(indp)

        if voiceFrame > 0:
            data1 = F0Transform(data1[indp])
            data2 = F0Transform(data2[indp])
            diff[indp, 0] = data1 - data2
            diff[indn, 1] = 1
            diff[indp, 2] = 1
            rmse = diff * diff

            #corr = scipy.stats.pearsonr(data1,data2)
            corr = scipy.stats.spearmanr(data1, data2)

        else:
            corr = [np.nan, 0]
            rmse = diff * np.nan

    else:
        diff = data1 - data2
        rmse = diff * diff
        voiceFrame = data1.shape[0]
        corr = -10

    return rmse, data1.shape[0], corr
Пример #3
0
def showRMSE(dim, rmseFile):
    if dim == 1:
        # F0
        data = funcs.read_raw_mat(rmseFile, 3)
        print "RMSE: %f\tCor: %f\t VU:%f\t" % (data[-1, 0], data[-1, 2],
                                               data[-1, 1]),
    else:
        # MGC
        data = funcs.read_raw_mat(rmseFile, dim + 1)
        print "RMSE: %f\t" % (data[-1, -1]),
Пример #4
0
def showRMSE(dim, rmseFile):
    if dim == 1:
        # F0
        data = funcs.read_raw_mat(rmseFile, 3)
        print "RMSE: %f\tCor: %f\t VU:%f\t" % (
            data[-1,0], 
            data[-1,2], 
            data[-1,1]),
    else:
        # MGC
        data = funcs.read_raw_mat(rmseFile, dim+1)
        print "RMSE: %f\t" % (data[-1,-1]),
Пример #5
0
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)
    if data1.shape[0]>data2.shape[0]:
        if dim==1:
            data1 = data1[0:data2.shape[0]]
        else:
            data1 = data1[0:data2.shape[0],:]
        #else:
        #    assert 1==0, "Unknown dimension"
    elif data1.shape[0]<data2.shape[0]:
        if dim==1:
            data2 = data2[0:data1.shape[0]]
        else:
            data2 = data2[0:data1.shape[0],:]
            #assert 1==0, "Unknown dimension"
    
    #if data1.ndim==1:
    #    data1 = data1.reshape([data1.shape[0],1])
    #    data2 = data2.reshape([data2.shape[0],1])
    
    if dim==1:
        # This is F0 
        diff = np.zeros([data1.shape[0], 3])
        temp1 = data1>0
        temp2 = data2>0
        indp = (temp1 *temp2)             # all voiced
        indn = (temp1 - temp2)             # u/v different
        voiceFrame = sum(indp)
        
        if voiceFrame>0:
            data1 = F0Transform(data1[indp])
            data2 = F0Transform(data2[indp])
            diff[indp,0] = data1-data2 #
            diff[indn,1] = 1                       # 
            diff[indp,2] = 1
            pow2 = diff*diff
            corr = scipy.stats.pearsonr(data1,data2)
        else:
            corr = [np.nan,0]
            pow2 = diff*np.nan
        
    else:
        diff = data1 - data2
        pow2 = diff*diff
        voiceFrame = data1.shape[0]
        corr = -10

    return pow2, data1.shape[0], corr
Пример #6
0
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)
    if data1.shape[0]>data2.shape[0]:
        if dim==1:
            data1 = data1[0:data2.shape[0]]
        else:
            data1 = data1[0:data2.shape[0],:]
        #else:
        #    assert 1==0, "Unknown dimension"
    elif data1.shape[0]<data2.shape[0]:
        if dim==1:
            data2 = data2[0:data1.shape[0]]
        else:
            data2 = data2[0:data1.shape[0],:]
            #assert 1==0, "Unknown dimension"
    
    #if data1.ndim==1:
    #    data1 = data1.reshape([data1.shape[0],1])
    #    data2 = data2.reshape([data2.shape[0],1])
    
    if dim==1:
        # This is F0 
        diff = np.zeros([data1.shape[0], 3])
        temp1 = data1>0
        temp2 = data2>0
        indp = (temp1 *temp2)             # all voiced
        indn = (temp1 - temp2)             # u/v different
        voiceFrame = sum(indp)
        
        if voiceFrame>0:
            data1 = F0Transform(data1[indp])
            data2 = F0Transform(data2[indp])
            diff[indp,0] = data1-data2 #
            diff[indn,1] = 1                       # 
            diff[indp,2] = 1
            pow2 = diff*diff
            corr = scipy.stats.pearsonr(data1,data2)
        else:
            corr = [np.nan,0]
            pow2 = diff*np.nan
        
    else:
        diff = data1 - data2
        pow2 = diff*diff
        voiceFrame = data1.shape[0]
        corr = -10

    return pow2, data1.shape[0], corr
Пример #7
0
def f0Conversion(dataOut, outname):
    """ Convert the discrete F0 into continuous F0, if the data is lf0
    """
    fileDir           = os.path.dirname(outname)
    fileName          = os.path.basename(outname)
    fileBase, fileExt = os.path.splitext(fileName)

    if fileExt == '.qf0' or fileExt == '.lf0':
        # for F0
        f0Max, f0Min, f0Levels, f0Interpolated   = cfg.f0Info

        defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.qf0')
        
        dataOut, vuv = f0funcs.f0Conversion(dataOut, f0Max, f0Min, f0Levels, 'd2c', f0Interpolated)
        if f0Interpolated:
            vuvFile  = fileDir + os.path.sep + fileBase + '.vuv'
            if os.path.isfile(vuvFile):
                vuv  = funcs.read_raw_mat(vuvFile, 1)
                dataOut[vuv<0.5] = 0.0
            else:
                print "Can't find %s for interpolated F0" % (vuvFile)
        
        # if the extension is .qf0 (quantized F0)
        defaultOutput(vuv, fileDir + os.path.sep + fileBase + '.vuv')
        defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.lf0')

    else:
        # for other data
        defaultOutput(dataOut, outname)
Пример #8
0
def f0Conversion(dataOut, outname):
    """ Convert the discrete F0 into continuous F0, if the data is lf0
    """
    fileDir           = os.path.dirname(outname)
    fileName          = os.path.basename(outname)
    fileBase, fileExt = os.path.splitext(fileName)

    if fileExt == '.qf0' or fileExt == '.lf0':
        # for F0
        f0Max, f0Min, f0Levels, f0Interpolated   = cfg.f0Info

        defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.qf0')
        
        dataOut, vuv = f0funcs.f0Conversion(dataOut, f0Max, f0Min, f0Levels, 'd2c', f0Interpolated)
        if f0Interpolated:
            vuvFile  = fileDir + os.path.sep + fileBase + '.vuv'
            if os.path.isfile(vuvFile):
                vuv  = funcs.read_raw_mat(vuvFile, 1)
                dataOut[vuv<0.5] = 0.0
            else:
                print("Can't find %s for interpolated F0" % (vuvFile))
        
        # if the extension is .qf0 (quantized F0)
        defaultOutput(vuv, fileDir + os.path.sep + fileBase + '.vuv')
        defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.lf0')

    else:
        # for other data
        defaultOutput(dataOut, outname)
def generateLabIndex(labfile, outfile, featDim):
    if os.path.isfile(labfile):
        labFile = py_rw.read_raw_mat(labfile, featDim)
        outBuf = np.arange(labFile.shape[0])
        py_rw.write_raw_mat(outBuf, outfile)
    else:
        print("Not found %s" % (labfile))
Пример #10
0
def getMeanStd(fileScp, fileDim, stdFloor=0.00001, f0Feature=0):
    """ Calculate the mean and std from a list of files
    """
    meanBuf = np.zeros([fileDim], dtype=np.float64)
    stdBuf  = np.zeros([fileDim], dtype=np.float64)
    timeStep = 0
    fileNum = sum(1 for line in open(fileScp))
    
    with open(fileScp, 'r') as filePtr:
        for idx, fileName in enumerate(filePtr):
            fileName = fileName.rstrip('\n')
            data = py_rw.read_raw_mat(fileName, fileDim)            
                
            sys.stdout.write('\r')
            sys.stdout.write("%d/%d" % (idx, fileNum))

            if f0Feature and fileDim == 1:
                # if this is F0 feature, remove unvoiced region
                data = data[np.where(data>0)]
            
            # parallel algorithm
            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
            dataCount = data.shape[0]
            if len(data.shape) == 1:
                meanNew = data.mean()
                stdNew = data.var()
            else:
                meanNew = data.mean(axis=0)
                stdNew = data.var(axis=0)
                
            deltaMean = meanNew - meanBuf
            meanBuf = meanBuf + deltaMean * (float(dataCount) / (timeStep + dataCount))
            
            if timeStep == 0:
                if len(data.shape) == 1:
                    stdBuf[0] = stdNew
                else:
                    stdBuf = stdNew
            else:
                stdBuf = (stdBuf * (float(timeStep) / (timeStep + dataCount)) +
                          stdNew * (float(dataCount)/ (timeStep + dataCount)) +
                          deltaMean * deltaMean  / (float(dataCount)/timeStep +
                                                    float(timeStep)/dataCount + 2.0))
            
            timeStep += data.shape[0]
    sys.stdout.write('\n')
    stdBuf = np.sqrt(stdBuf)

    floorIdx = stdBuf < stdFloor
    stdBuf[floorIdx] = 1.0
    
    meanBuf = np.asarray(meanBuf, dtype=np.float32)
    stdBuf = np.asarray(stdBuf, dtype=np.float32)

    return meanBuf, stdBuf
Пример #11
0
def compennom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, 
              addMV=1, flushT=400000, waitT=5, stdT=0.000001):
    """ 
    Add the mean/std back to some dimensions
    mask: 1: compensente it
          0: not compensente
    """
    print "compensente %s" % (ncFile)
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: x+y/z*m
    else:
        maskData = None
        operation = lambda x,y,z: x+y/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #12
0
def compennom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, 
              addMV=1, flushT=400000, waitT=5, stdT=0.000001):
    """ 
    Add the mean/std back to some dimensions
    mask: 1: compensente it
          0: not compensente
    """
    print("compensente %s" % (ncFile))
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: x+y/z*m
    else:
        maskData = None
        operation = lambda x,y,z: x+y/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #13
0
def normnom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, 
            addMV=1, flushT=400000, waitT=5, stdT=0.000001):
    """ 
    normalizing without shifting mean of certain dimension
    specify the operation and call ncFileNanipulate
    mask: 1: normalize it without shifting mean
          0: normalize it
    """
    print("normlizing without mean shift %s" % (ncFile))
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: (x-y*(1-m))/z
    else:
        maskData = None
        operation = lambda x,y,z: (x)/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #14
0
def norm(ncFile, mvFile, ncTarget=None, mask=None, flagKeepOri=1, 
         addMV=1, flushT=400000, waitT=5, stdT=0.000001, reverse=0):
    """ 
    normalizing the data, 
    specify the operation and call ncFileNanipulate
    mask: 1: normalize it
          0: not normalize it
    """
    print "norm %s " % (ncFile)
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: (x-y*m)/(z**m)
    else:
        maskData = None
        operation = lambda x,y,z: (x-y)/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #15
0
def norm(ncFile, mvFile, ncTarget=None, mask=None, flagKeepOri=1, 
         addMV=1, flushT=400000, waitT=5, stdT=0.000001, reverse=0):
    """ 
    normalizing the data, 
    specify the operation and call ncFileNanipulate
    mask: 1: normalize it
          0: not normalize it
    """
    print("norm %s " % (ncFile))
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: (x-y*m)/(z**m)
    else:
        maskData = None
        operation = lambda x,y,z: (x-y)/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #16
0
def normnom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, 
            addMV=1, flushT=400000, waitT=5, stdT=0.000001):
    """ 
    normalizing without shifting mean of certain dimension
    specify the operation and call ncFileNanipulate
    mask: 1: normalize it without shifting mean
          0: normalize it
    """
    print "normlizing without mean shift %s" % (ncFile)
    if mask is not None:
        maskData = py_rw.read_raw_mat(mask, 1)
        if reverse:
            maskData = 1-maskData
        operation = lambda x,y,z,m: (x-y*(1-m))/z
    else:
        maskData = None
        operation = lambda x,y,z: (x)/z
    ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, 
                     addMV, flushT, waitT, stdT, maskData)
Пример #17
0
def meanStd(ncScp, mvFile, normMethod=None):
    """
    calculate the mean and variance over all .nc in ncScp
    Welford's one line algorithm on mean and population variance
    """
    timeStep = 0
    with open(ncScp, 'r') as filePtr:
        for idx, ncFile in enumerate(filePtr):
            ncFile = ncFile.rstrip('\n')
            data = io.netcdf_file(ncFile)
            print "Processing %s" % (ncFile)
            if idx==0:
                # for the first file, get the dimension of data
                # create the buffer
                inputSize = data.dimensions['inputPattSize']
                outSize   = data.dimensions['targetPattSize']
                meanInBuf = np.zeros([inputSize], dtype=np.float64)
                stdInBuf  = np.zeros([inputSize], dtype=np.float64)
                meanOutBuf = np.zeros([outSize], dtype=np.float64)
                stdOutBuf  = np.zeros([outSize], dtype=np.float64)
                
                # if max min normalization is used, get the max and min value
                if normMethod is not None:
                    maxminInBuf      = np.zeros([inputSize, 2], dtype=np.float64)
                    maxminInBuf[:,0] = data.variables['inputs'][:].max(axis = 0)
                    maxminInBuf[:,1] = data.variables['inputs'][:].min(axis = 0)
                    print "Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min())
                    maxminOutBuf      = np.zeros([outSize, 2], dtype=np.float64)
                    maxminOutBuf[:,0] = data.variables['targetPatterns'][:].max(axis = 0)
                    maxminOutBuf[:,1] = data.variables['targetPatterns'][:].min(axis = 0)
                    print "Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), 
                                                     maxminOutBuf[:,1].min())
                #
            else:
                # for the remaining data files
                if normMethod is not None:
                    tmp = data.variables['inputs'][:].max(axis = 0)
                    maxminInBuf[:,0] = np.maximum(tmp, maxminInBuf[:,0])
                    tmp = data.variables['inputs'][:].min(axis = 0)
                    maxminInBuf[:,1] = np.minimum(tmp, maxminInBuf[:,1])
                    print "Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min())
                    tmp = data.variables['targetPatterns'][:].max(axis = 0)
                    maxminOutBuf[:,0] = np.maximum(tmp, maxminOutBuf[:,0])
                    tmp = data.variables['targetPatterns'][:].min(axis = 0)
                    maxminOutBuf[:,1] = np.minimum(tmp, maxminOutBuf[:,1])
                    print "Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), 
                                                     maxminOutBuf[:,1].min())
            
            numTimes = data.dimensions['numTimesteps']
            print "Processing %s of %s frames" % (ncFile, numTimes)
            print "Input max %f\tmin %f"  % (data.variables['inputs'][:].max(),
                                             data.variables['inputs'][:].min())
            print "Output max %f\tmin %f" % (data.variables['targetPatterns'][:].max(),
                                             data.variables['targetPatterns'][:].min())
            
            for t in xrange(numTimes):
                tmpIn = (data.variables['inputs'][t, :]-meanInBuf)
                meanInBuf = meanInBuf + tmpIn*1.0/(timeStep+t+1)
                tmpOut = (data.variables['targetPatterns'][t, :]-meanOutBuf)
                meanOutBuf = meanOutBuf + tmpOut*1.0/(timeStep+t+1)
                stdInBuf = stdInBuf + tmpIn*(data.variables['inputs'][t, :]-meanInBuf)
                stdOutBuf = stdOutBuf + tmpOut*(data.variables['targetPatterns'][t, :]-meanOutBuf)
            timeStep += numTimes
            data.close()
    stdOutBuf = np.sqrt(stdOutBuf/(timeStep-1))
    stdInBuf  = np.sqrt(stdInBuf/(timeStep-1))
    

    # create MV and save
    f = io.netcdf.netcdf_file(mvFile, 'w')
    f.createDimension('inputPattSize', inputSize)
    f.createDimension('targetPattSize', outSize)
    f.createVariable('inputMeans', 'f', ('inputPattSize',))
    f.createVariable('inputStdevs', 'f', ('inputPattSize', ))
    f.createVariable('outputMeans', 'f', ('targetPattSize',))
    f.createVariable('outputStdevs', 'f', ('targetPattSize', ))
    
    
    if normMethod is not None:
        normIdx = py_rw.read_raw_mat(normMethod, 1, 'i4', 'l')
        assert normIdx.shape[0] == (inputSize + outSize), errorMes([normMethod], 2) 
        inNormIdx  = normIdx[0:inputSize]
        outNormIdx = normIdx[inputSize:(inputSize+outSize)]
        
        f.createVariable('inputMeans_ori',   'f', ('inputPattSize', ))
        f.createVariable('inputStdevs_ori',  'f', ('inputPattSize', ))
        f.createVariable('outputMeans_ori',  'f', ('targetPattSize',))
        f.createVariable('outputStdevs_ori', 'f', ('targetPattSize',))
        meanInBuf_ori, stdInBuf_ori        = meanInBuf.copy(), stdInBuf.copy()
        meanOutBuf_ori, stdOutBuf_ori      = meanOutBuf.copy(), stdOutBuf.copy()
        f.variables['inputMeans_ori'][:]   = np.asarray(meanInBuf_ori, np.float32)
        f.variables['inputStdevs_ori'][:]  = np.asarray(stdInBuf_ori, np.float32)
        f.variables['outputMeans_ori'][:]  = np.asarray(meanOutBuf_ori, np.float32)
        f.variables['outputStdevs_ori'][:] = np.asarray(stdOutBuf_ori, np.float32)
        
        f.createVariable('inputMax_ori',   'f', ('inputPattSize', ))
        f.createVariable('inputMin_ori',  'f', ('inputPattSize', ))
        f.createVariable('outputMax_ori',  'f', ('targetPattSize',))
        f.createVariable('outputMin_ori', 'f', ('targetPattSize',))
        maxInBuf,  minInBuf  = maxminInBuf[:,0].copy(),  maxminInBuf[:,1].copy()
        maxOutBuf, minOutBuf = maxminOutBuf[:,0].copy(), maxminOutBuf[:,1].copy()
        f.variables['inputMax_ori'][:]   = np.asarray(maxminInBuf[:,0],  np.float32)
        f.variables['inputMin_ori'][:]   = np.asarray(maxminInBuf[:,1],  np.float32)
        f.variables['outputMax_ori'][:]  = np.asarray(maxminOutBuf[:,0], np.float32)
        f.variables['outputMin_ori'][:]  = np.asarray(maxminOutBuf[:,1], np.float32)
        
        #if min(inNormIdx) < 0:
        #    negIdx = np.unique(inNormIdx[inNormIdx<0]) # the negative method
        #    for idx in negIdx:
        #        dataIdx   = np.where(inNormIdx == idx)
        #        assert len(dataIdx)>0, 'Impossible error in normMethod'
        #        tempInBuf = stdInBuf.copy()
        #        tempInBuf[np.where(inNormIdx != idx)] = 0
        #        inNormIdx[dataIdx] = np.argmax(tempInBuf)
                
        #if min(outNormIdx) < 0:
        #    negIdx = np.unique(outNormIdx[outNormIdx<0]) # the negative method
        #    for idx in negIdx:
        #        dataIdx   = np.where(outNormIdx == idx)
        #        assert len(dataIdx)>0, 'Impossible error in normMethod'
        #        tempOutBuf = stdOutBuf.copy()
        #        tempOutBuf[np.where(outNormIdx != idx)] = 0
        #        outNormIdx[dataIdx] = np.argmax(tempOutBuf)    
        
        #maxIn, minIn   = max(inNormIdx), min(inNormIdx)
        #maxOut, minOut = max(outNormIdx), min(outNormIdx)
        #assert (maxIn>=0  and maxIn<inputSize),  'inNormIdx out of bound. Please check normMethod'
        #assert (maxOut>=0 and maxOut<inputSize), 'outNormIdx out of bound. Please check normMethod'
        #assert (minIn>=0  and minIn<inputSize),  'inNormIdx out of bound. Please check normMethod'
        #assert (minOut>=0 and minOut<inputSize), 'outNormIdx out of bound. Please check normMethod'
        if min(inNormIdx) < 0:
            tmpMin   = ((1-g_MinMaxRange) * minInBuf - g_MinMaxRange * maxInBuf)/(1-2*g_MinMaxRange)
            tmpMax   = ((1-g_MinMaxRange) * maxInBuf - g_MinMaxRange * minInBuf)/(1-2*g_MinMaxRange)
            
            maxminIndex  = inNormIdx < 0
            meanInBuf[maxminIndex] = tmpMin[maxminIndex]
            stdInBuf[maxminIndex]  = tmpMax[maxminIndex]-tmpMin[maxminIndex]
        if min(outNormIdx) < 0:
            tmpMin   = ((1-g_MinMaxRange)*minOutBuf-g_MinMaxRange*maxOutBuf)/(1-2*g_MinMaxRange)
            tmpMax   = ((1-g_MinMaxRange)*maxOutBuf-g_MinMaxRange*minOutBuf)/(1-2*g_MinMaxRange)

            maxminIndex = outNormIdx < 0
            meanOutBuf[maxminIndex] = tmpMin[maxminIndex]
            stdOutBuf[maxminIndex]  = tmpMax[maxminIndex]-tmpMin[maxminIndex]
        print "Combing maxmin done"

    f.variables['inputMeans'][:] = np.asarray(meanInBuf, np.float32)
    f.variables['inputStdevs'][:] = np.asarray(stdInBuf, np.float32)
    f.variables['outputMeans'][:] = np.asarray(meanOutBuf, np.float32)
    f.variables['outputStdevs'][:] = np.asarray(stdOutBuf, np.float32)

    f.flush()
    f.close()
    print "*** please check max/min above\n"
    print "*** writing done %s\n" % (mvFile) 
Пример #18
0
import pickle

#import funcs

try:
    from binaryTools import readwriteC2 as funcs
except ImportError:
    try:
        from binaryTools import readwriteC2_220 as funcs
    except ImportError:
        try:
            from ioTools import readwrite as funcs
        finally:
            print "Please add path of pyTools to PYTHONPATH"
            raise Exception(
                "Can't not import binaryTools/readwriteC2 or funcs")

if __name__ == "__main__":

    outDim = 256  # dimension of the output
    outFile = './mseWeight'  # where to write the output vector?

    # write the vector
    data = np.ones([outDim], dtype=np.float32)  # prepare the weight vector
    data[100:180] = 0.5  # I'd like the 100th-179th dimension with weight 0.5
    funcs.write_raw_mat(data, outFile)

    # test
    data = funcs.read_raw_mat(outFile, outDim)
    print data
Пример #19
0
def bmat2nc_sub2(fileScp, outputfile, shiftInput, shiftOutput, maskFile=None, flushT=300, waitT=30):
    """ Package the data into .nc file
    one row, one frame of data 
    maskFile: to discard certain dimension of data. Text file, each line specify 
             the start and end column of the single input of output data
              e.g. 0 180   # read the 0-180th column of data1
                   0 3     # read the 0-3th column of data2
                   10 12   # read the 10-12th column of data3
    flushT: after reading this number of utterances, nc block will be flushed to the disk
    waitT:  the number of seconds to wait for the flush process (
            to avoid read and write the disk at the same time)
    """

    numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \
        inputDim, outputDim, inputDimSE, outDimSE, \
        allTxtLength, maxTxtLength, txtPatSize  = pre_process(fileScp, maskFile)
    print "Data format input: %s, output: %s" % (str(inputPattSize), str(outputPattSize))
    print "Creating nc file %s" % (outputfile)
    if txtPatSize > 0 and maxTxtLength > 0:
        print "Using txt data, maxlength and dimension %d %d" % (maxTxtLength, txtPatSize)
    
    
    # create the dimension
    if os.path.exists(outputfile):
        print "*** %s exists. It will be overwritten" % (outputfile)
    f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2)
    f.createDimension('numSeqs', numSeqs)
    f.createDimension('numTimesteps', timeSteps)
    f.createDimension('inputPattSize', inputPattSize)
    f.createDimension('targetPattSize', outputPattSize)
    f.createDimension('maxSeqTagLength', maxSeqLength+1)
    
    if txtPatSize>0 and maxTxtLength > 0:
        f.createDimension('txtLength',   allTxtLength)
        f.createDimension('txtPattSize', txtPatSize)
    

    tagsVar  = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength'))
    seqLVar  = f.createVariable('seqLengths', 'i', ('numSeqs',))
    inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize'))
    outVar   = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize'))

    if txtPatSize>0 and maxTxtLength > 0:
        txtVar   = f.createVariable('txtData',    'i', ('txtLength', 'txtPattSize'))
        txtLVar  = f.createVariable('txtLengths', 'i', ('numSeqs',))
    
        
    
    #seqLVar  = np.zeros([numSeqs])
    seqLVar[:] = 0
    tagsVar[:] = ''
    timeStart = 0
    count = 0

    txtStart  = 0

    with open(fileScp, 'r') as filePtr:
        for idx1, line in enumerate(filePtr):
            temp = line.split()
            print "Reading %s" % (temp[0])
            seqFrame      = int(temp[3])
            seqLVar[idx1] = seqFrame #int(temp[3])
            
            tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq
            inputFileNum = int(temp[1])
            outFileNum   = int(temp[2])
            slotBias = 4
            
            if txtPatSize>0 and maxTxtLength > 0:
                txtLength = int(temp[slotBias + 2*(inputFileNum + outFileNum)])
                txtDim    = int(temp[slotBias + 2*(inputFileNum + outFileNum)+1])
                txtFile   = temp[slotBias + 2*(inputFileNum + outFileNum) + 2]
                data      = py_rw.read_raw_mat(txtFile, txtDim)
                if txtDim == 1:
                    txtVar[txtStart:(txtStart+txtLength),0] = data.copy()
                else:
                    txtVar[txtStart:(txtStart+txtLength),:] = data.copy()
                txtStart = txtStart + txtLength
                txtLVar[idx1] = txtLength

            for idx2 in xrange(inputFileNum):
                [sDim, eDim] = inputDim[idx2,0:2]
                dim = int(temp[slotBias+(idx2)*2])
                datafile = temp[slotBias+(idx2)*2+1]

                #data_raw = readwrite.FromFile(datafile)
                #m,n = data_raw.size/dim, dim
                #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile)
                #data = data_raw.reshape((m,n))
                
                # store the data
                tS,tE,dS,dE = timeStart, (timeStart+seqFrame), inputDimSE[idx2][0], \
                              inputDimSE[idx2][1] 
                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)
                assert (data.shape[0]-seqFrame)<seqFrame*0.1, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    data = data[0:seqFrame]
                    inputVar[tS:tE,dS] = data[0:seqFrame].copy()
                    
                else:
                    data = data[0:seqFrame,sDim:eDim]
                    
                    inputVar[tS:tE,dS:dE] = data[0:seqFrame, \
                                                 inputDimSE[idx2][2]:inputDimSE[idx2][3]].copy()

            slotBias = 4+inputFileNum*2
            for idx2 in xrange(outFileNum):
                [sDim, eDim] = outputDim[idx2,0:2]
                dim = int(temp[slotBias+(idx2)*2])
                datafile = temp[slotBias+(idx2)*2+1]
                
                #data_raw = readwrite.FromFile(datafile)
                #m,n = data_raw.size/dim, dim
                #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile)
                #data = data_raw.reshape((m,n))
                
                # read and store the output data
                tS,tE,dS,dE = timeStart, (timeStart+seqFrame), outDimSE[idx2][0], \
                              outDimSE[idx2][1] 

                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)

                assert (data.shape[0]-seqFrame)<seqFrame*0.1, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    data = data[0:seqFrame]
                    outVar[tS:tE,dS] =data[0:seqFrame].copy()
                    
                else:

                    data = data[0:seqFrame,sDim:eDim]
                    if shiftOutput != 0:
                        outVar[tS:tE,dS:dE] = np.roll(data, shiftOutput, axis=0)[0:seqFrame, \
                                                       outDimSE[idx2][2]:outDimSE[idx2][3]].copy()
                    else:
                        outVar[tS:tE,dS:dE] = data[0:seqFrame, \
                                                       outDimSE[idx2][2]:outDimSE[idx2][3]].copy()
                    

            #print idx1
            del data
            if count > flushT:
                count = 0
                _write(f) #.flush()
                print "Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT)
                #raw_input("Enter")
                for x in xrange(waitT):
                    print "*",
                    sys.stdout.flush()
                    time.sleep(1)
            count += 1
            timeStart += seqFrame
    print "Reading and writing done " 
    f.close()
Пример #20
0
def bmat2nc_sub1(fileScp, outputfile, maskFile=None, flushT=300, waitT=30):
    """ Package the data into .nc file
    one row, one frame of data 
    maskFile: to discard certain dimension of data. Text file, each line specify 
             the start and end column of the single input of output data
              e.g. 0 180   # read the 0-180th column of data1
                   0 3     # read the 0-3th column of data2
                   10 12   # read the 10-12th column of data3
    flushT: after reading this number of utterances, nc block will be flushed to the disk
    waitT:  the number of seconds to wait for the flush process (
            to avoid read and write the disk at the same time)
    """
    numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \
        inputDim, outputDim, inputDimSE, outDimSE, \
        allTxtLength, maxTxtLength, txtPatSize  = pre_process(fileScp, maskFile)
    print "Creating nc file %s" % (outputfile)
    print "Input dimension:  %s\n output dimension: %s" % (str(inputPattSize), str(outputPattSize))

    # create the dimension
    if os.path.exists(outputfile):
        print "*** %s exists. It will be overwritten" % (outputfile)
    f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2)
    f.createDimension('numSeqs', numSeqs)
    f.createDimension('numTimesteps', timeSteps)
    f.createDimension('inputPattSize', inputPattSize)
    f.createDimension('targetPattSize', outputPattSize)
    f.createDimension('maxSeqTagLength', maxSeqLength+1)
    
    tagsVar   = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength'))
    seqLVar   = f.createVariable('seqLengths', 'i', ('numSeqs',))
    inputVar  = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize'))
    outVar    = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize'))
    
    #seqLVar  = np.zeros([numSeqs])
    
    seqLVar[:] = 0
    tagsVar[:] = ''
    timeStart  = 0
    count      = 0
    
    with open(fileScp, 'r') as filePtr:
        for idx1, line in enumerate(filePtr):
            
            temp          = line.split()
            print "Reading %s" % (temp[0])
            seqFrame      = int(temp[3])
            seqLVar[idx1] = seqFrame #int(temp[3])
            
            tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq
            inputFileNum  = int(temp[1])
            outFileNum    = int(temp[2])
            slotBias      = 4
            
            for idx2 in xrange(inputFileNum):
                [sDim, eDim] = inputDimSE[idx2,2:4]         # start, end dimension in raw data
                dim          = int(temp[slotBias+(idx2)*2]) # raw data dim
                datafile     = temp[slotBias+(idx2)*2+1]    # path to raw data
                
                [dS, dE]     = inputDimSE[idx2,0:2]         # start, end dimension in package data
                tS,tE        = timeStart,(timeStart+seqFrame)
                
                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)
                assert (data.shape[0]-seqFrame)<seqFrame*0.3, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    #data = data[0:seqFrame]
                    inputVar[tS:tE,dS]    = data[0:seqFrame].copy()
                else:
                    #data = data[0:seqFrame,sDim:eDim]
                    inputVar[tS:tE,dS:dE] = data[0:seqFrame, sDim:eDim].copy()

            slotBias = 4+inputFileNum*2
            for idx2 in xrange(outFileNum):
                [sDim, eDim] = outDimSE[idx2,2:4]
                dim          = int(temp[slotBias+(idx2)*2])
                datafile     = temp[slotBias+(idx2)*2+1]
                [dS,   dE]   = outDimSE[idx2,0:2]
                tS, tE       = timeStart, (timeStart+seqFrame)
                
                if datafile  == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)

                assert (data.shape[0]-seqFrame)<seqFrame*0.1, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    outVar[tS:tE,dS] =data[0:seqFrame].copy()
                else:
                    outVar[tS:tE,dS:dE] = data[0:seqFrame,sDim:eDim].copy()

            #print idx1
            del data
            if count > flushT:
                count = 0
                _write(f) #.flush()
                print "Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT)
                #raw_input("Enter")
                for x in xrange(waitT):
                    print "*",
                    sys.stdout.flush()
                    time.sleep(1)
            count += 1
            timeStart += seqFrame
    print "Read and write done\n"
    f.close()
Пример #21
0
def SplitData(fileScp,      fileDir2,   fileDir, 
              outputName,   outDim,     outputDelta, 
              flagUseDelta, datamv,     normMask,
              outputMethod,
              stdT=0.000001):
    """ Split the generated HTK into acoustic features
    """
    
    filePtr = open(fileDir+os.path.sep+'gen.scp', 'w')
    
    if len(datamv) > 0 and os.path.isfile(datamv):
        print "External Mean Variance file will be used to de-normalize the data"
        try:
            datamv    = io.netcdf_file(datamv)
            m         = datamv.variables['outputMeans'][:].copy()
            v         = datamv.variables['outputStdevs'][:].copy()
            assert m.shape[0]==sum(outDim), "Incompatible dimension"
        except TypeError:
            datamv    = funcs.read_raw_mat(datamv, 1)
            assert datamv.shape[0] == sum(outDim)*2, 'Dim of datamv is invalid'
            m         = datamv[0:sum(outDim)]
            v         = datamv[sum(outDim):sum(outDim)*2]
        v[v<stdT] = 1.0
        if normMask is not None:
            assert normMask.shape[0] == m.shape[0], 'normMask dimension invalid'
            m = m * normMask
            v = v ** normMask
    else:
        m = np.zeros([sum(outDim)])
        v = np.ones([sum(outDim)])
    
    for fileName in fileScp:
        fileBaseName, fileExt = os.path.splitext(fileName)
        
        if os.path.isfile(fileDir2 + os.path.sep + fileName) and fileExt=='.htk':
            filePtr.write(fileDir2+os.path.sep+fileName+'\n')

            # the output of CURRENNT is big-endian
            data = funcs.read_htk(fileDir2 + os.path.sep + fileName, end='b')
            assert data.shape[1]==sum(outDim), "Dimension of "+ fileName +" is not"+ sum(outDim)
            data = data*v+m

            # extract the data from the htk output of CURRENNT 
            for index, outname in enumerate(outputName):
                sIndex = sum(outDim[:index])
                eIndex = sum(outDim[:index+1])
                
                if (flagUseDelta=='0' or flagUseDelta==0) and outputDelta[index]>1:
                    # if mlpgFlag is off, only generate the static components
                    # assume *.htk has [static, delta, delta-delta]
                    eIndex = sIndex + (eIndex-sIndex)/outputDelta[index]
                    # remove the '_delta' extension if it has
                    outname = outname.rstrip('_delta')
					
                dataOut = data[:, sIndex:eIndex]
                outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname
                outputMethod(dataOut, outname)
            print "Writing acoustic data: "+ fileBaseName
            
        elif fileExt=='.htk':
            print "Cannot find file %s" + fileName
            
    filePtr.close()
Пример #22
0
def RMSECalcore(file1, file2, dim):
    """ Calculate the RMSE and Corr
    file1: path to the input feature file 1
    file2: path to the input feature file 2
    dim: dimension of the feature

    return RMSE_error, valid_frame_number, Corr
    """
    # load the data
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)

    # if the number of frames is different,
    # get the number of frames that can shift
    shift_max = np.abs(data2.shape[0] - data1.shape[0])
    if data1.shape[0] > data2.shape[0]:
        # the minimum length of the two input files
        valid_length = data2.shape[0]
        # the shorter one is fixed
        fixed_data = data2
        # shift the longer one
        shift_data = data1
    else:
        valid_length = data1.shape[0]
        fixed_data = data1
        shift_data = data2

    max_v_cover = 0
    max_corr = -1.0
    min_rmse = 1000000
    min_rmse_buf = []
    max_corr_buf = []
    shift_pos = 0

    # do RMSE calcualtion by shifting [0, ..., shift_max] frames
    # find the best value
    for shift_t in range(shift_max + 1):
        if dim == 1:
            # for F0 calculation
            # shift the longer F0 trajectory
            shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy()
            # keep the shorter F0 trajectory
            fixed_data_temp = fixed_data.copy()

            # count the frames where both are voiced
            diff = np.zeros([shift_data_temp.shape[0], 3])
            temp1 = shift_data_temp > 0
            temp2 = fixed_data_temp > 0
            indp = (temp1 * temp2)
            indn = np.bitwise_xor(temp1, temp2)
            voiceFrame = sum(indp)

            if voiceFrame > 0:
                # if there is common voiced frame
                # calculate the RMSE and Corr
                shift_data_temp = F0Transform(shift_data_temp[indp])
                fixed_data_temp = F0Transform(fixed_data_temp[indp])
                diff[indp, 0] = shift_data_temp - fixed_data_temp
                diff[indn, 1] = 1
                diff[indp, 2] = 1
                pow2 = diff * diff
                corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp)

            else:
                print("%s %s" % (file1, file2))
                # else, no result
                corr = [np.nan, 0]
                pow2 = diff * np.nan

            # calculate the U/V error rate
            v_cover = voiceFrame * 1.0 / valid_length
        else:
            print('Only for F0 data')

        # We can select the shift point by number of coverage
        #if v_cover > max_v_cover:
        #  or by max Corr
        if corr[0] > max_corr:
            max_corr = corr[0]
            max_corr_buf = corr
            min_rmse_buf = pow2
            shift_pos = shift_t
            max_v_cover = v_cover

    return min_rmse_buf, valid_length, max_corr_buf
Пример #23
0
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)

    shift_max = np.abs(data2.shape[0] - data1.shape[0])
    if data1.shape[0]>data2.shape[0]:
        valid_length = data2.shape[0]
        fixed_data   = data2
        shift_data   = data1
    else:
        valid_length = data1.shape[0]
        fixed_data   = data1
        shift_data   = data2

    max_v_cover = 0
    max_corr = -1.0
    min_rmse = 1000000
    min_rmse_buf = []
    max_corr_buf = []
    shift_pos = 0

    if shift_max == 0:
        shift_max = 1
    for shift_t in range(shift_max):        
        if dim==1:
            shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy()
            fixed_data_temp = fixed_data.copy()
            
            # This is F0 
            diff = np.zeros([shift_data_temp.shape[0], 3])
            temp1 = shift_data_temp > 0
            temp2 = fixed_data_temp > 0
            indp = (temp1 *temp2)             
            indn = np.bitwise_xor(temp1, temp2)
            voiceFrame = sum(indp)
        
            if voiceFrame>0:
                shift_data_temp = F0Transform(shift_data_temp[indp])
                fixed_data_temp = F0Transform(fixed_data_temp[indp])
                diff[indp,0] = shift_data_temp - fixed_data_temp
                diff[indn,1] = 1                        
                diff[indp,2] = 1
                pow2 = diff*diff
                corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp)
            
            else:
                corr = [np.nan,0]
                pow2 = diff*np.nan

            v_cover = voiceFrame * 1.0 / valid_length
                
        else:
            print 'Only for F0 data'
            
        if v_cover > max_v_cover:
            max_corr = corr[0]
            max_corr_buf = corr
            min_rmse_buf = pow2
            shift_pos = shift_t
            max_v_cover = v_cover

    return min_rmse_buf, valid_length, max_corr_buf
Пример #24
0
def meanStd(ncScp, mvFile, normMethod=None):
    """
    calculate the mean and variance over all .nc in ncScp
    Welford's one line algorithm on mean and population variance
    """
    timeStep = 0
    with open(ncScp, 'r') as filePtr:
        for idx, ncFile in enumerate(filePtr):
            ncFile = ncFile.rstrip('\n')
            data = io.netcdf_file(ncFile)
            print("Processing %s" % (ncFile))
            if idx==0:
                # for the first file, get the dimension of data
                # create the buffer
                inputSize = data.dimensions['inputPattSize']
                outSize   = data.dimensions['targetPattSize']
                meanInBuf = np.zeros([inputSize], dtype=np.float64)
                stdInBuf  = np.zeros([inputSize], dtype=np.float64)
                meanOutBuf = np.zeros([outSize], dtype=np.float64)
                stdOutBuf  = np.zeros([outSize], dtype=np.float64)
                
                # if max min normalization is used, get the max and min value
                if normMethod is not None:
                    maxminInBuf      = np.zeros([inputSize, 2], dtype=np.float64)
                    maxminInBuf[:,0] = data.variables['inputs'][:].max(axis = 0)
                    maxminInBuf[:,1] = data.variables['inputs'][:].min(axis = 0)
                    print("Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min()))
                    maxminOutBuf      = np.zeros([outSize, 2], dtype=np.float64)
                    maxminOutBuf[:,0] = data.variables['targetPatterns'][:].max(axis = 0)
                    maxminOutBuf[:,1] = data.variables['targetPatterns'][:].min(axis = 0)
                    print("Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), 
                                                     maxminOutBuf[:,1].min()))
                #
            else:
                # for the remaining data files
                if normMethod is not None:
                    tmp = data.variables['inputs'][:].max(axis = 0)
                    maxminInBuf[:,0] = np.maximum(tmp, maxminInBuf[:,0])
                    tmp = data.variables['inputs'][:].min(axis = 0)
                    maxminInBuf[:,1] = np.minimum(tmp, maxminInBuf[:,1])
                    print("Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min()))
                    tmp = data.variables['targetPatterns'][:].max(axis = 0)
                    maxminOutBuf[:,0] = np.maximum(tmp, maxminOutBuf[:,0])
                    tmp = data.variables['targetPatterns'][:].min(axis = 0)
                    maxminOutBuf[:,1] = np.minimum(tmp, maxminOutBuf[:,1])
                    print("Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), 
                                                     maxminOutBuf[:,1].min()))
            
            numTimes = data.dimensions['numTimesteps']
            print("Processing %s of %s frames" % (ncFile, numTimes))
            print("Input max %f\tmin %f"  % (data.variables['inputs'][:].max(),
                                             data.variables['inputs'][:].min()))
            print("Output max %f\tmin %f" % (data.variables['targetPatterns'][:].max(),
                                             data.variables['targetPatterns'][:].min()))
            
            for t in range(numTimes):
                tmpIn = (data.variables['inputs'][t, :]-meanInBuf)
                meanInBuf = meanInBuf + tmpIn*1.0/(timeStep+t+1)
                tmpOut = (data.variables['targetPatterns'][t, :]-meanOutBuf)
                meanOutBuf = meanOutBuf + tmpOut*1.0/(timeStep+t+1)
                stdInBuf = stdInBuf + tmpIn*(data.variables['inputs'][t, :]-meanInBuf)
                stdOutBuf = stdOutBuf + tmpOut*(data.variables['targetPatterns'][t, :]-meanOutBuf)
            timeStep += numTimes
            data.close()
    stdOutBuf = np.sqrt(stdOutBuf/(timeStep-1))
    stdInBuf  = np.sqrt(stdInBuf/(timeStep-1))
    

    # create MV and save
    f = io.netcdf.netcdf_file(mvFile, 'w')
    f.createDimension('inputPattSize', inputSize)
    f.createDimension('targetPattSize', outSize)
    f.createVariable('inputMeans', 'f', ('inputPattSize',))
    f.createVariable('inputStdevs', 'f', ('inputPattSize', ))
    f.createVariable('outputMeans', 'f', ('targetPattSize',))
    f.createVariable('outputStdevs', 'f', ('targetPattSize', ))
    
    
    if normMethod is not None:
        normIdx = py_rw.read_raw_mat(normMethod, 1, 'i4', 'l')
        assert normIdx.shape[0] == (inputSize + outSize), errorMes([normMethod], 2) 
        inNormIdx  = normIdx[0:inputSize]
        outNormIdx = normIdx[inputSize:(inputSize+outSize)]
        
        f.createVariable('inputMeans_ori',   'f', ('inputPattSize', ))
        f.createVariable('inputStdevs_ori',  'f', ('inputPattSize', ))
        f.createVariable('outputMeans_ori',  'f', ('targetPattSize',))
        f.createVariable('outputStdevs_ori', 'f', ('targetPattSize',))
        meanInBuf_ori, stdInBuf_ori        = meanInBuf.copy(), stdInBuf.copy()
        meanOutBuf_ori, stdOutBuf_ori      = meanOutBuf.copy(), stdOutBuf.copy()
        f.variables['inputMeans_ori'][:]   = np.asarray(meanInBuf_ori, np.float32)
        f.variables['inputStdevs_ori'][:]  = np.asarray(stdInBuf_ori, np.float32)
        f.variables['outputMeans_ori'][:]  = np.asarray(meanOutBuf_ori, np.float32)
        f.variables['outputStdevs_ori'][:] = np.asarray(stdOutBuf_ori, np.float32)
        
        f.createVariable('inputMax_ori',   'f', ('inputPattSize', ))
        f.createVariable('inputMin_ori',  'f', ('inputPattSize', ))
        f.createVariable('outputMax_ori',  'f', ('targetPattSize',))
        f.createVariable('outputMin_ori', 'f', ('targetPattSize',))
        maxInBuf,  minInBuf  = maxminInBuf[:,0].copy(),  maxminInBuf[:,1].copy()
        maxOutBuf, minOutBuf = maxminOutBuf[:,0].copy(), maxminOutBuf[:,1].copy()
        f.variables['inputMax_ori'][:]   = np.asarray(maxminInBuf[:,0],  np.float32)
        f.variables['inputMin_ori'][:]   = np.asarray(maxminInBuf[:,1],  np.float32)
        f.variables['outputMax_ori'][:]  = np.asarray(maxminOutBuf[:,0], np.float32)
        f.variables['outputMin_ori'][:]  = np.asarray(maxminOutBuf[:,1], np.float32)
        
        #if min(inNormIdx) < 0:
        #    negIdx = np.unique(inNormIdx[inNormIdx<0]) # the negative method
        #    for idx in negIdx:
        #        dataIdx   = np.where(inNormIdx == idx)
        #        assert len(dataIdx)>0, 'Impossible error in normMethod'
        #        tempInBuf = stdInBuf.copy()
        #        tempInBuf[np.where(inNormIdx != idx)] = 0
        #        inNormIdx[dataIdx] = np.argmax(tempInBuf)
                
        #if min(outNormIdx) < 0:
        #    negIdx = np.unique(outNormIdx[outNormIdx<0]) # the negative method
        #    for idx in negIdx:
        #        dataIdx   = np.where(outNormIdx == idx)
        #        assert len(dataIdx)>0, 'Impossible error in normMethod'
        #        tempOutBuf = stdOutBuf.copy()
        #        tempOutBuf[np.where(outNormIdx != idx)] = 0
        #        outNormIdx[dataIdx] = np.argmax(tempOutBuf)    
        
        #maxIn, minIn   = max(inNormIdx), min(inNormIdx)
        #maxOut, minOut = max(outNormIdx), min(outNormIdx)
        #assert (maxIn>=0  and maxIn<inputSize),  'inNormIdx out of bound. Please check normMethod'
        #assert (maxOut>=0 and maxOut<inputSize), 'outNormIdx out of bound. Please check normMethod'
        #assert (minIn>=0  and minIn<inputSize),  'inNormIdx out of bound. Please check normMethod'
        #assert (minOut>=0 and minOut<inputSize), 'outNormIdx out of bound. Please check normMethod'
        if min(inNormIdx) < 0:
            tmpMin   = ((1-g_MinMaxRange) * minInBuf - g_MinMaxRange * maxInBuf)/(1-2*g_MinMaxRange)
            tmpMax   = ((1-g_MinMaxRange) * maxInBuf - g_MinMaxRange * minInBuf)/(1-2*g_MinMaxRange)
            
            maxminIndex  = inNormIdx < 0
            meanInBuf[maxminIndex] = tmpMin[maxminIndex]
            stdInBuf[maxminIndex]  = tmpMax[maxminIndex]-tmpMin[maxminIndex]
        if min(outNormIdx) < 0:
            tmpMin   = ((1-g_MinMaxRange)*minOutBuf-g_MinMaxRange*maxOutBuf)/(1-2*g_MinMaxRange)
            tmpMax   = ((1-g_MinMaxRange)*maxOutBuf-g_MinMaxRange*minOutBuf)/(1-2*g_MinMaxRange)

            maxminIndex = outNormIdx < 0
            meanOutBuf[maxminIndex] = tmpMin[maxminIndex]
            stdOutBuf[maxminIndex]  = tmpMax[maxminIndex]-tmpMin[maxminIndex]
        print("Combing maxmin done")

    f.variables['inputMeans'][:] = np.asarray(meanInBuf, np.float32)
    f.variables['inputStdevs'][:] = np.asarray(stdInBuf, np.float32)
    f.variables['outputMeans'][:] = np.asarray(meanOutBuf, np.float32)
    f.variables['outputStdevs'][:] = np.asarray(stdOutBuf, np.float32)

    f.flush()
    f.close()
    print("*** please check max/min above\n")
    print("*** writing done %s\n" % (mvFile))
Пример #25
0
def SplitData(fileScp,      fileDir2,   fileDir, 
              outputName,   outDim,     outputDelta, 
              datamv,     normMask,
              outputMethod,
              stdT=0.000001):
    """ Split the generated HTK into acoustic features
    """
    
    filePtr = open(fileDir+os.path.sep+'gen.scp', 'w')
    
    if len(datamv) > 0 and os.path.isfile(datamv):
        print("External Mean Variance file will be used to de-normalize the data")
        try:
            datamv    = io.netcdf_file(datamv)
            m         = datamv.variables['outputMeans'][:].copy()
            v         = datamv.variables['outputStdevs'][:].copy()
            assert m.shape[0]==sum(outDim), "Incompatible dimension"
        except TypeError:
            datamv    = funcs.read_raw_mat(datamv, 1)
            assert datamv.shape[0] == sum(outDim)*2, 'Dim of datamv is invalid'
            m         = datamv[0:sum(outDim)]
            v         = datamv[sum(outDim):sum(outDim)*2]
        v[v<stdT] = 1.0
        if normMask is not None:
            assert normMask.shape[0] == m.shape[0], 'normMask dimension invalid'
            m = m * normMask
            v = v ** normMask
    else:
        m = np.zeros([sum(outDim)])
        v = np.ones([sum(outDim)])
    
    for fileName in fileScp:
        fileBaseName, fileExt = os.path.splitext(fileName)
        
        if os.path.isfile(fileDir2 + os.path.sep + fileName) and fileExt=='.htk':
            filePtr.write(fileDir2+os.path.sep+fileName+'\n')

            # the output of CURRENNT is big-endian
            data = funcs.read_htk(fileDir2 + os.path.sep + fileName, end='b')
            assert data.shape[1]==sum(outDim), "Dimension of "+ fileName +" != "+ str(sum(outDim))
            data = data*v+m

            # extract the data from the htk output of CURRENNT 
            for index, outname in enumerate(outputName):
                sIndex = sum(outDim[:index])
                eIndex = sum(outDim[:index+1])

                # generate both delta and static components
                if outputDelta[index]>1:

                    # output static component
                    staticIndex = sIndex + (eIndex-sIndex)/outputDelta[index]
                    outname = outname.split('_delta')[0]
                    dataOut = data[:, sIndex:staticIndex]
                    outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname
                    outputMethod(dataOut, outname)

                    # output all 
                    outname = outname + '_delta'
                    dataOut = data[:, sIndex:eIndex]
                    outputMethod(dataOut, outname)
                    
                else:
                    # feature don't have delta component
                    outname = outname.split('_delta')[0]	
                    dataOut = data[:, sIndex:eIndex]
                    outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname
                    outputMethod(dataOut, outname)
                    
            print("Writing acoustic data: "+ fileBaseName)
            
        elif fileExt=='.htk':
            print("Cannot find file %s" + fileName)
            
    filePtr.close()
Пример #26
0
def bmat2nc_sub1(fileScp, outputfile, maskFile=None, flushT=300, waitT=30):
    """ Package the data into .nc file
    one row, one frame of data 
    maskFile: to discard certain dimension of data. Text file, each line specify 
             the start and end column of the single input of output data
              e.g. 0 180   # read the 0-180th column of data1
                   0 3     # read the 0-3th column of data2
                   10 12   # read the 10-12th column of data3
    flushT: after reading this number of utterances, nc block will be flushed to the disk
    waitT:  the number of seconds to wait for the flush process (
            to avoid read and write the disk at the same time)
    """
    numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \
        inputDim, outputDim, inputDimSE, outDimSE, \
        allTxtLength, maxTxtLength, txtPatSize  = pre_process(fileScp, maskFile)
    print("Creating nc file %s" % (outputfile))
    print("Input dimension:  %s\n output dimension: %s" % (str(inputPattSize), str(outputPattSize)))

    # create the dimension
    if os.path.exists(outputfile):
        print("*** %s exists. It will be overwritten" % (outputfile))
    f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2)
    f.createDimension('numSeqs', numSeqs)
    f.createDimension('numTimesteps', timeSteps)
    f.createDimension('inputPattSize', inputPattSize)
    f.createDimension('targetPattSize', outputPattSize)
    f.createDimension('maxSeqTagLength', maxSeqLength+1)
    
    tagsVar   = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength'))
    seqLVar   = f.createVariable('seqLengths', 'i', ('numSeqs',))
    inputVar  = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize'))
    outVar    = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize'))
    
    #seqLVar  = np.zeros([numSeqs])
    
    seqLVar[:] = 0
    tagsVar[:] = ''
    timeStart  = 0
    count      = 0
    
    with open(fileScp, 'r') as filePtr:
        for idx1, line in enumerate(filePtr):
            
            temp          = line.split()
            print("Reading %s" % (temp[0]))
            seqFrame      = int(temp[3])

            if seqFrame < 1:
                print("Error: data is empty. Please check %s in %s" % (temp[0], fileScp))
                assert 1==0, errorMes([datafile], 3) + "Error in preparing data"
            
            seqLVar[idx1] = seqFrame #int(temp[3])
            
            tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq
            inputFileNum  = int(temp[1])
            outFileNum    = int(temp[2])
            slotBias      = 4
            
            for idx2 in range(inputFileNum):
                [sDim, eDim] = inputDimSE[idx2,2:4]         # start, end dimension in raw data
                dim          = int(temp[slotBias+(idx2)*2]) # raw data dim
                datafile     = temp[slotBias+(idx2)*2+1]    # path to raw data
                
                [dS, dE]     = inputDimSE[idx2,0:2]         # start, end dimension in package data
                tS,tE        = timeStart,(timeStart+seqFrame)
                
                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)
                if (data.shape[0]-seqFrame)>seqFrame*0.3:
                    print("Error: please check the data named by %s" % (temp[0]))
                    assert 1==0, errorMes([datafile], 3) + "Error in preparing data" % (datafile)
                if dim==1 and data.ndim==1:
                    #data = data[0:seqFrame]
                    inputVar[tS:tE,dS]    = data[0:seqFrame].copy()
                else:
                    #data = data[0:seqFrame,sDim:eDim]
                    inputVar[tS:tE,dS:dE] = data[0:seqFrame, sDim:eDim].copy()

            slotBias = 4+inputFileNum*2
            for idx2 in range(outFileNum):
                [sDim, eDim] = outDimSE[idx2,2:4]
                dim          = int(temp[slotBias+(idx2)*2])
                datafile     = temp[slotBias+(idx2)*2+1]
                [dS,   dE]   = outDimSE[idx2,0:2]
                tS, tE       = timeStart, (timeStart+seqFrame)
                
                if datafile  == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)

                if (data.shape[0]-seqFrame) > seqFrame * 0.3:
                    print("Error: please check the data named by %s" % (temp[0]))
                    assert 1==0, errorMes([datafile], 3) + "Error in preparing data" % (datafile)
                
                if dim==1 and data.ndim==1:
                    outVar[tS:tE,dS] =data[0:seqFrame].copy()
                else:
                    outVar[tS:tE,dS:dE] = data[0:seqFrame,sDim:eDim].copy()

            #print idx1
            del data
            if count > flushT:
                count = 0
                _write(f) #.flush()
                print("Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT))
                #raw_input("Enter")
                for x in range(waitT):
                    print("*", end=' ')
                    sys.stdout.flush()
                    time.sleep(1)
            count += 1
            timeStart += seqFrame
    print("Read and write done\n")
    f.close()
Пример #27
0
def bmat2nc_sub2(fileScp, outputfile, shiftInput, shiftOutput, maskFile=None, flushT=300, waitT=30):
    """ Package the data into .nc file
    one row, one frame of data 
    maskFile: to discard certain dimension of data. Text file, each line specify 
             the start and end column of the single input of output data
              e.g. 0 180   # read the 0-180th column of data1
                   0 3     # read the 0-3th column of data2
                   10 12   # read the 10-12th column of data3
    flushT: after reading this number of utterances, nc block will be flushed to the disk
    waitT:  the number of seconds to wait for the flush process (
            to avoid read and write the disk at the same time)
    """

    numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \
        inputDim, outputDim, inputDimSE, outDimSE, \
        allTxtLength, maxTxtLength, txtPatSize  = pre_process(fileScp, maskFile)
    print("Data format input: %s, output: %s" % (str(inputPattSize), str(outputPattSize)))
    print("Creating nc file %s" % (outputfile))
    if txtPatSize > 0 and maxTxtLength > 0:
        print("Using txt data, maxlength and dimension %d %d" % (maxTxtLength, txtPatSize))
    
    
    # create the dimension
    if os.path.exists(outputfile):
        print("*** %s exists. It will be overwritten" % (outputfile))
    f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2)
    f.createDimension('numSeqs', numSeqs)
    f.createDimension('numTimesteps', timeSteps)
    f.createDimension('inputPattSize', inputPattSize)
    f.createDimension('targetPattSize', outputPattSize)
    f.createDimension('maxSeqTagLength', maxSeqLength+1)
    
    if txtPatSize>0 and maxTxtLength > 0:
        f.createDimension('txtLength',   allTxtLength)
        f.createDimension('txtPattSize', txtPatSize)
    

    tagsVar  = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength'))
    seqLVar  = f.createVariable('seqLengths', 'i', ('numSeqs',))
    inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize'))
    outVar   = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize'))

    if txtPatSize>0 and maxTxtLength > 0:
        txtVar   = f.createVariable('txtData',    'i', ('txtLength', 'txtPattSize'))
        txtLVar  = f.createVariable('txtLengths', 'i', ('numSeqs',))
    
        
    
    #seqLVar  = np.zeros([numSeqs])
    seqLVar[:] = 0
    tagsVar[:] = ''
    timeStart = 0
    count = 0

    txtStart  = 0

    with open(fileScp, 'r') as filePtr:
        for idx1, line in enumerate(filePtr):
            temp = line.split()
            print("Reading %s" % (temp[0]))
            seqFrame      = int(temp[3])
            seqLVar[idx1] = seqFrame #int(temp[3])
            
            tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq
            inputFileNum = int(temp[1])
            outFileNum   = int(temp[2])
            slotBias = 4
            
            if txtPatSize>0 and maxTxtLength > 0:
                txtLength = int(temp[slotBias + 2*(inputFileNum + outFileNum)])
                txtDim    = int(temp[slotBias + 2*(inputFileNum + outFileNum)+1])
                txtFile   = temp[slotBias + 2*(inputFileNum + outFileNum) + 2]
                data      = py_rw.read_raw_mat(txtFile, txtDim)
                if txtDim == 1:
                    txtVar[txtStart:(txtStart+txtLength),0] = data.copy()
                else:
                    txtVar[txtStart:(txtStart+txtLength),:] = data.copy()
                txtStart = txtStart + txtLength
                txtLVar[idx1] = txtLength

            for idx2 in range(inputFileNum):
                [sDim, eDim] = inputDim[idx2,0:2]
                dim = int(temp[slotBias+(idx2)*2])
                datafile = temp[slotBias+(idx2)*2+1]

                #data_raw = readwrite.FromFile(datafile)
                #m,n = data_raw.size/dim, dim
                #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile)
                #data = data_raw.reshape((m,n))
                
                # store the data
                tS,tE,dS,dE = timeStart, (timeStart+seqFrame), inputDimSE[idx2][0], \
                              inputDimSE[idx2][1] 
                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)
                assert (data.shape[0]-seqFrame)<seqFrame*0.1, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    data = data[0:seqFrame]
                    inputVar[tS:tE,dS] = data[0:seqFrame].copy()
                    
                else:
                    data = data[0:seqFrame,sDim:eDim]
                    
                    inputVar[tS:tE,dS:dE] = data[0:seqFrame, \
                                                 inputDimSE[idx2][2]:inputDimSE[idx2][3]].copy()

            slotBias = 4+inputFileNum*2
            for idx2 in range(outFileNum):
                [sDim, eDim] = outputDim[idx2,0:2]
                dim = int(temp[slotBias+(idx2)*2])
                datafile = temp[slotBias+(idx2)*2+1]
                
                #data_raw = readwrite.FromFile(datafile)
                #m,n = data_raw.size/dim, dim
                #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile)
                #data = data_raw.reshape((m,n))
                
                # read and store the output data
                tS,tE,dS,dE = timeStart, (timeStart+seqFrame), outDimSE[idx2][0], \
                              outDimSE[idx2][1] 

                if datafile == g_VOIDFILE:
                    data = np.zeros([seqFrame, dim])
                else:
                    data = py_rw.read_raw_mat(datafile, dim)

                assert (data.shape[0]-seqFrame)<seqFrame*0.1, \
                    errorMes([datafile], 3) + "This data has less number of frames" % (datafile)
                if dim==1 and data.ndim==1:
                    data = data[0:seqFrame]
                    outVar[tS:tE,dS] =data[0:seqFrame].copy()
                    
                else:

                    data = data[0:seqFrame,sDim:eDim]
                    if shiftOutput != 0:
                        outVar[tS:tE,dS:dE] = np.roll(data, shiftOutput, axis=0)[0:seqFrame, \
                                                       outDimSE[idx2][2]:outDimSE[idx2][3]].copy()
                    else:
                        outVar[tS:tE,dS:dE] = data[0:seqFrame, \
                                                       outDimSE[idx2][2]:outDimSE[idx2][3]].copy()
                    

            #print idx1
            del data
            if count > flushT:
                count = 0
                _write(f) #.flush()
                print("Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT))
                #raw_input("Enter")
                for x in range(waitT):
                    print("*", end=' ')
                    sys.stdout.flush()
                    time.sleep(1)
            count += 1
            timeStart += seqFrame
    print("Reading and writing done ")
    f.close()
Пример #28
0
def getMeanStd(fileScp, fileDim, stdFloor=0.00001, f0Feature=0):
    """ Calculate the mean and std from a list of files
    """
    meanBuf = np.zeros([fileDim], dtype=np.float64)
    stdBuf = np.zeros([fileDim], dtype=np.float64)
    timeStep = 0
    fileNum = 0
    with open(fileScp, 'r') as filePtr:
        for line in filePtr:
            fileNum += 1

    with open(fileScp, 'r') as filePtr:
        for idx, fileName in enumerate(filePtr):
            fileName = fileName.rstrip('\n')
            data = py_rw.read_raw_mat(fileName, fileDim)

            sys.stdout.write('\r')
            sys.stdout.write("%d/%d" % (idx, fileNum))

            if f0Feature and fileDim == 1:
                # if this is F0 feature, remove unvoiced region
                data = data[np.where(data > 0)]
                if data.shape[0] < 1:
                    continue
            if data.shape[0] < 1:
                print('%s no data\n' % (fileName))
                continue

            # parallel algorithm
            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
            dataCount = data.shape[0]

            try:
                if len(data.shape) == 1:
                    meanNew = data.mean()
                    stdNew = data.var()
                else:
                    meanNew = data.mean(axis=0)
                    stdNew = data.var(axis=0)

                deltaMean = meanNew - meanBuf
                meanBuf = meanBuf + deltaMean * (float(dataCount) /
                                                 (timeStep + dataCount))

                if timeStep == 0:
                    if len(data.shape) == 1:
                        stdBuf[0] = stdNew
                    else:
                        stdBuf = stdNew
                else:
                    stdBuf = (stdBuf * (float(timeStep) /
                                        (timeStep + dataCount)) + stdNew *
                              (float(dataCount) / (timeStep + dataCount)) +
                              deltaMean * deltaMean /
                              (float(dataCount) / timeStep +
                               float(timeStep) / dataCount + 2.0))

                timeStep += data.shape[0]
            except RuntimeWarning:
                print("\t%s has ill data. Please consider remove it" %
                      (fileName))

    sys.stdout.write('\n')
    stdBuf = np.sqrt(stdBuf)

    floorIdx = stdBuf < stdFloor
    stdBuf[floorIdx] = 1.0

    meanBuf = np.asarray(meanBuf, dtype=np.float32)
    stdBuf = np.asarray(stdBuf, dtype=np.float32)

    return meanBuf, stdBuf
def RMSECalcore(file1, file2, dim):
    data1 = funcs.read_raw_mat(file1, dim)
    data2 = funcs.read_raw_mat(file2, dim)

    shift_max = np.abs(data2.shape[0] - data1.shape[0])
    if data1.shape[0] > data2.shape[0]:
        valid_length = data2.shape[0]
        fixed_data = data2
        shift_data = data1
    else:
        valid_length = data1.shape[0]
        fixed_data = data1
        shift_data = data2

    max_v_cover = 0
    max_corr = -1.0
    min_rmse = 1000000
    min_rmse_buf = []
    max_corr_buf = []
    shift_pos = 0

    if shift_max == 0:
        shift_max = 1
    for shift_t in range(shift_max):
        if dim == 1:
            shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy()
            fixed_data_temp = fixed_data.copy()

            # This is F0
            diff = np.zeros([shift_data_temp.shape[0], 3])
            temp1 = shift_data_temp > 0
            temp2 = fixed_data_temp > 0
            indp = (temp1 * temp2)
            indn = np.bitwise_xor(temp1, temp2)
            voiceFrame = sum(indp)

            if voiceFrame > 0:
                shift_data_temp = F0Transform(shift_data_temp[indp])
                fixed_data_temp = F0Transform(fixed_data_temp[indp])
                diff[indp, 0] = shift_data_temp - fixed_data_temp
                diff[indn, 1] = 1
                diff[indp, 2] = 1
                pow2 = diff * diff
                corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp)

            else:
                corr = [np.nan, 0]
                pow2 = diff * np.nan

            v_cover = voiceFrame * 1.0 / valid_length

        else:
            print('Only for F0 data')

        # We can select the shift point by number of coverage
        #if v_cover > max_v_cover:
        #  or by max Corr
        if corr[0] > max_corr:
            max_corr = corr[0]
            max_corr_buf = corr
            min_rmse_buf = pow2
            shift_pos = shift_t
            max_v_cover = v_cover

    return min_rmse_buf, valid_length, max_corr_buf