def create_weights_lab( labpath, fids, outfilepath, lineheadregexp=r'([^\^]+)\^([^-]+)-([^\+]+)\+([^=]+)=([^@]+)@(.+)', silencesymbol='sil', shift=0.005): """ This function creates a one-column vector with one weight value per frame. This weight is created based on the silence symbol that is at the head of each lab line. Some lab file formats uses: r'([^\~]+)\~([^-]+)-([^\+]+)\+([^=]+)=([^:]+):(.+)' """ makedirs(os.path.dirname(outfilepath)) outfilepath, _ = data.getpathandshape(outfilepath) for fid in readids(fids): print_tty( '\r Processing feature file {} '.format(fid)) with open(labpath.replace('*', fid)) as f: lines = f.readlines() lineels = re.findall(r'([0-9]+)\s+([0-9]+)\s+(.+)', lines[-1])[0] tend = float(lineels[1]) * 1e-7 weight = np.ones(int(np.ceil(tend / shift)), dtype='float32') for line in lines: lineels = re.findall(r'([0-9]+)\s+([0-9]+)\s+(.+)', line)[0] tstart = float(lineels[0]) * 1e-7 tend = float(lineels[1]) * 1e-7 # print('{}-{}'.format(tstart, tend)) phones = re.findall(lineheadregexp, lineels[2])[0] if phones[2] == silencesymbol: weight[int(np.floor(tstart / shift)):int(np.ceil(tend / shift))] = 0.0 weight.astype('float32').tofile(outfilepath.replace('*', fid)) print_tty( '\r \r')
def test_data(self): import data fids = readids(cptest + 'file_id_list.scp') path, shape = data.getpathandshape('dummy.fwlspec') self.assertTrue(path == 'dummy.fwlspec') self.assertTrue(shape == None) path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)') self.assertTrue(path == 'dummy.fwlspec') self.assertTrue(shape == (-1, 129)) path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)', (-1, 12)) self.assertTrue(path == 'dummy.fwlspec') self.assertTrue(shape == (-1, 12)) path, shape = data.getpathandshape('dummy.fwlspec', (-1, 12)) self.assertTrue(path == 'dummy.fwlspec') self.assertTrue(shape == (-1, 12)) dim = data.getlastdim('dummy.fwlspec') self.assertTrue(dim == 1) dim = data.getlastdim('dummy.fwlspec:(-1,129)') self.assertTrue(dim == 129) indir = cptest + 'binary_label_' + str( lab_size) + '_norm_minmaxm11/*.lab:(-1,' + str(lab_size) + ')' Xs = data.load(indir, fids, shape=None, frameshift=0.005, verbose=1, label='Xs: ') self.assertTrue(len(Xs) == 10) print(Xs[0].shape) self.assertTrue(Xs[0].shape == (667, lab_size)) print(data.gettotallen(Xs)) self.assertTrue(data.gettotallen(Xs) == 5694) outdir = cptest + 'wav_cmp_lf0_fwlspec65_fwnm17_bndnmnoscale/*.cmp:(-1,83)' Ys = data.load(outdir, fids, shape=None, frameshift=0.005, verbose=1, label='Ys: ') print('len(Ys)=' + str(len(Ys))) self.assertTrue(len(Ys) == 10) print('Ys[0].shape' + str(Ys[0].shape)) self.assertTrue(Ys[0].shape == (666, 83)) wdir = cptest + 'wav_fwlspec65_weights/*.w:(-1,1)' Ws = data.load(wdir, fids, shape=None, frameshift=0.005, verbose=1, label='Ws: ') self.assertTrue(len(Ws) == 10) Xs, Ys, Ws = data.croplen([Xs, Ys, Ws]) [Xs, Ys], Ws = data.croplen_weight([Xs, Ys], Ws, thresh=0.5) Xs_w_stop = data.addstop(Xs) X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset( indir, outdir, wdir, fids, length=None, lengthmax=100, maskpadtype='randshift', inouttimesync=False) X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset( indir, outdir, wdir, fids, length=None, lengthmax=100, maskpadtype='randshift') X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset( indir, outdir, wdir, fids, length=None, lengthmax=100, maskpadtype='randshift', cropmode='begendbigger') X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset( indir, outdir, wdir, fids, length=None, lengthmax=100, maskpadtype='randshift', cropmode='all') worst_val = data.cost_0pred_rmse(Ys) print('worst_val={}'.format(worst_val)) worst_val = data.cost_0pred_rmse(Ys[0]) print('worst_val={}'.format(worst_val)) def data_cost_model_mfn(Xs, Ys): return np.std(Ys) # TODO More usefull X_vals = data.load(indir, fids) Y_vals = data.load(outdir, fids) X_vals, Y_vals = data.croplen([X_vals, Y_vals]) cost = data.cost_model_mfn(data_cost_model_mfn, [X_vals, Y_vals]) print(cost) class SmokyModel: def predict(self, Xs): return np.zeros([1, Xs.shape[1], 83]) mod = SmokyModel() cost = data.cost_model_prediction_rmse(mod, [Xs], Ys) print(cost) std = data.prediction_mstd(mod, [Xs]) print(std) rms = data.prediction_rms(mod, [Xs]) print(rms)
def test_compose(self): import data import compose fids = readids(cptest + '/file_id_list.scp') wav_dir = 'wav' f0_path = cptest + wav_dir + '_lf0/*.lf0' spec_path = cptest + wav_dir + '_fwlspec' + str( spec_size) + '/*.fwlspec' nm_path = cptest + wav_dir + '_fwnm' + str(nm_size) + '/*.fwnm' compose.compose([ cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' + str(lab_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose_lab0/*.lab', id_valid_start=8, normfn=None, wins=[], dropzerovardims=False) compose.compose([ cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' + str(lab_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose_lab1/*.lab', id_valid_start=8, normfn=compose.normalise_minmax, wins=[], dropzerovardims=False) path2, shape2 = data.getpathandshape( 'tests/test_made__smoke_compose_compose_lab1/*.lab:(mean.dat,' + str(lab_size) + ')') compose.compose([ cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' + str(lab_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose_lab2/*.lab', id_valid_start=8, normfn=compose.normalise_minmax, wins=[], dropzerovardims=True) compose.compose([ f0_path, spec_path + ':(-1,' + str(spec_size) + ')', nm_path + ':(-1,' + str(nm_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose2_cmp1/*.cmp', id_valid_start=8, normfn=compose.normalise_minmax, wins=[]) compose.compose([ f0_path, spec_path + ':(-1,' + str(spec_size) + ')', nm_path + ':(-1,' + str(nm_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose2_cmp2/*.cmp', id_valid_start=8, normfn=compose.normalise_meanstd, wins=[]) compose.compose([ f0_path, spec_path + ':(-1,' + str(spec_size) + ')', nm_path + ':(-1,' + str(nm_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose2_cmp4/*.cmp', id_valid_start=8, normfn=compose.normalise_meanstd_nmnoscale, wins=[]) compose.compose( [ f0_path, spec_path + ':(-1,' + str(spec_size) + ')', nm_path + ':(-1,' + str(nm_size) + ')' ], fids, 'tests/test_made__smoke_compose_compose2_cmp_deltas/*.cmp', id_valid_start=8, normfn=compose.normalise_meanstd_nmnoscale, wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]]) # WORLD vocoder features compose.compose( [ cptest + wav_dir + '_world_lf0/*.lf0', cptest + wav_dir + '_world_fwlspec/*.fwlspec:(-1,' + str(spec_size) + ')', cptest + wav_dir + '_world_fwdbaper/*.fwdbaper:(-1,' + str(nm_size) + ')', cptest + wav_dir + '_world_vuv/*.vuv' ], fids, 'tests/test_made__smoke_compose_compose2_cmp_WORLD/*.cmp', id_valid_start=8, normfn=compose.normalise_meanstd, wins=[]) compose.compose( [ cptest + wav_dir + '_world_lf0/*.lf0', cptest + wav_dir + '_world_fwlspec/*.fwlspec:(-1,' + str(spec_size) + ')', cptest + wav_dir + '_world_fwdbaper/*.fwdbaper:(-1,' + str(nm_size) + ')', cptest + wav_dir + '_world_vuv/*.vuv' ], fids, 'tests/test_made__smoke_compose_compose2_cmp_WORLD_mlpg/*.cmp', id_valid_start=8, normfn=compose.normalise_meanstd, wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]]) compose.create_weights_spec( spec_path + ':(-1,' + str(spec_size) + ')', fids, 'tests/test_made__smoke_compose_compose2_w1/*.w', spec_type='fwlspec', thresh=-32)
def create_weights_spec(specfeaturepath, fids, outfilepath, thresh=-32, dftlen=4096, spec_type='fwlspec'): """ This function creates a one-column vector with one weight value per frame. This weight is computed as a silence coefficient. During training, silent segments will be dropped (i.e. dropped if weight<0.5), if present at the very begining or very end of the sample (or optionnaly within a sentence if the silence is particularly long). thresh : [dB] The weight of the frames whose energy < threshold are set weight = 0, and 1 otherwise. """ def mag2db(a): return 20.0 * np.log10(np.abs(a)) outfilepath = re.sub( r':[^:]+$', "", outfilepath) # ignore any shape suffix in the output path if not os.path.isdir(os.path.dirname(outfilepath)): os.mkdir(os.path.dirname(outfilepath)) for nf, fid in enumerate(fids): print_tty( '\r Processing feature files {} for {} '.format( nf, fid)) infilepath, shape = data.getpathandshape(specfeaturepath) if shape is None: shape = (-1, 1) infilepath = infilepath.replace('*', fid) if spec_type == 'fwlspec': Yspec = np.fromfile(infilepath, dtype='float32') Yspec = Yspec.reshape(shape) ener = mag2db(np.exp(np.mean(Yspec, axis=1))) elif spec_type == 'mcep': Ymcep = np.fromfile(infilepath, dtype='float32') Ymcep = Ymcep.reshape(shape) ener = mag2db(np.exp(Ymcep[:, 0])) # Just need the first coef elif spec_type == 'fwcep': Ymcep = np.fromfile(infilepath, dtype='float32') Ymcep = Ymcep.reshape(shape) ener = mag2db(np.exp(Ymcep[:, 0])) # Just need the first coef # Normalise by the strongest value # That might not be very reliable if the estimated spec env is very noisy. ener -= np.max(ener) weight = ener.copy() weight[ener >= thresh] = 1.0 weight[ener < thresh] = 0.0 weight.astype('float32').tofile(outfilepath.replace('*', fid)) if 0: import matplotlib.pyplot as plt plt.plot(ener, 'k') plt.plot(np.log10(weight), 'b') plt.plot([0, len(ener)], thresh * np.array([1, 1]), 'k') from IPython.core.debugger import Pdb Pdb().set_trace() print_tty( '\r \r')
def compose(featurepaths, fids, outfilepath, wins=None, id_valid_start=-1, normfn=None, shift=0.005, dropzerovardims=False, do_finalcheck=False, verbose=1): """ For each file index in fids, compose a set of features (can be input or output data) into a single file and normalise it according to statistics and normfn. The outfilepath will be populated by the composed/normlised files, and, by statistics files that can be used for de-composition. Parameters ---------- featurepaths : path of features to concatenate for each file fids : file IDs outfilepath : outputpath of the resulted composition and normalisation. wins : list of numpy arrays E.g. values in Merlin are wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]] """ print('Compose data (id_valid_start={})'.format(id_valid_start)) if wins is None: wins = [] outfilepath = re.sub( r':[^:]+$', "", outfilepath) # ignore any shape suffix in the output path if not os.path.isdir(os.path.dirname(outfilepath)): os.mkdir(os.path.dirname(outfilepath)) size = None mins = None maxs = None means = None nbframes = 0 for nf, fid in enumerate(fids): print_tty('\r Composing file {}/{} {} '.format( 1 + nf, len(fids), fid)) features = [] minlen = None for featurepath in featurepaths: infilepath, shape = data.getpathandshape(featurepath) if shape is None: shape = (-1, 1) infilepath = infilepath.replace('*', fid) feature = np.fromfile(infilepath, dtype='float32') feature = feature.reshape(shape) features.append(feature) if minlen is None: minlen = feature.shape[0] else: minlen = np.min((minlen, feature.shape[0])) # Crop features to same length for feati in xrange(len(features)): features[feati] = features[feati][:minlen, ] Y = np.hstack(features) if len(wins) > 0: YWs = [Y] # Always add first the static values for win in wins: # Then concatenate the windowed values YW = np.ones(Y.shape) win_p = (len(win) + 1) / 2 for d in xrange(Y.shape[1]): YW[win_p - 1:-(win_p - 1), d] = -scipy.signal.convolve( Y[:, d], win)[win_p:-win_p] # The fastest YW[:win_p - 1, d] = YW[win_p - 1, d] YW[-(win_p - 1):, d] = YW[-(win_p - 1) - 1, d] YWs.append(YW) Y = np.hstack(YWs) #if 0: #from merlin.mlpg_fast import MLParameterGenerationFast as MLParameterGeneration #mlpg_algo = MLParameterGeneration() #var = np.tile(np.ones(CMP.shape[1]),(CMP.shape[0],1)) # Simplification! #YGEN = mlpg_algo.generation(CMP, var, 1) #plt.plot(Y, 'k') #plt.plot(YGEN, 'b') #from IPython.core.debugger import Pdb; Pdb().set_trace() size = Y.shape[1] if nf < id_valid_start: if mins is None: mins = Y.min(axis=0) else: mins = np.minimum(mins, Y.min(axis=0)) if maxs is None: maxs = Y.max(axis=0) else: maxs = np.maximum(maxs, Y.max(axis=0)) if means is None: means = Y.sum(axis=0).astype('float64') else: means += Y.sum(axis=0).astype('float64') nbframes += Y.shape[0] #print('\r Write data file {}: {} '.format(nf, fid)), Y.astype('float32').tofile(outfilepath.replace('*', fid)) print_tty( '\r \r') means /= nbframes zerovaridx = np.where( (maxs - mins) == 0.0)[0] # Indices of dimensions having zero-variance mins.astype('float32').tofile(os.path.dirname(outfilepath) + '/min.dat') if verbose > 1: print(' mins={}'.format(mins)) # pragma: no cover maxs.astype('float32').tofile(os.path.dirname(outfilepath) + '/max.dat') if verbose > 1: print(' maxs={}'.format(maxs)) # pragma: no cover means.astype('float32').tofile(os.path.dirname(outfilepath) + '/mean.dat') if verbose > 1: print(' means={}'.format(means)) # pragma: no cover # Now that we have the mean, we can do the std stds = None for nf, fid in enumerate(fids): Y = np.fromfile(outfilepath.replace('*', fid), dtype='float32') Y = Y.reshape((-1, size)) if nf < id_valid_start: if stds is None: stds = ((Y - means)**2).sum(axis=0).astype('float64') else: stds += ((Y - means)**2).sum(axis=0).astype('float64') stds /= nbframes - 1 # unbiased variance estimator stds = np.sqrt(stds) stds.astype('float32').tofile(os.path.dirname(outfilepath) + '/std.dat') if verbose > 1: print(' stds={}'.format(stds)) keepidx = np.arange(len(means)) if dropzerovardims: keepidx = np.setdiff1d(np.arange(len(means)), zerovaridx) size = len(keepidx) keepidx.astype('int32').tofile( os.path.dirname(outfilepath) + '/keepidx.dat') print('Dropped dimensions with zero variance. Remains {} dims'.format( size)) print('{} files'.format(len(fids))) print('{} frames ({}s assuming {}s time shift)'.format( nbframes, datetime.timedelta(seconds=nbframes * shift), shift)) strsize = '' for fpath in featurepaths: dummy, shape = data.getpathandshape(fpath) if shape is None: strsize += '1+' else: strsize += str(shape[1]) + '+' strsize = strsize[:-1] if dropzerovardims: strsize += '-' + str(len(zerovaridx)) print('nb dimensions={} (features: ({})x{})'.format( size, strsize, 1 + len(wins))) print('{} dimensions with zero-variance ({}){}'.format( len(zerovaridx), zerovaridx, ', which have been dropped' if dropzerovardims else ', which have been kept')) if normfn is not None: print('normalisation done using: {}'.format(normfn.__name__)) else: print('no normalisation called') print('output path: {}'.format(outfilepath)) # Maybe this shouldn't be called within compose, it should come afterwards. No see #30 if not normfn is None: normfn(outfilepath, fids, featurepaths=featurepaths, keepidx=keepidx, verbose=verbose) if do_finalcheck: print('Check data final statistics') verif_means = None verif_stds = None verif_mins = None verif_maxs = None verif_nbframes = 0 for nf, fid in enumerate(fids): if nf >= id_valid_start: continue fpath = outfilepath.replace('*', fid) Y = np.fromfile(fpath, dtype='float32') Y = Y.reshape((-1, size)) if verif_means is None: verif_means = Y.sum(axis=0).astype('float64') else: verif_means += Y.sum(axis=0).astype('float64') if verif_mins is None: verif_mins = Y.min(axis=0) else: verif_mins = np.minimum(verif_mins, Y.min(axis=0)) if verif_maxs is None: verif_maxs = Y.max(axis=0) else: verif_maxs = np.maximum(verif_maxs, Y.max(axis=0)) verif_nbframes += Y.shape[0] verif_means /= verif_nbframes for nf, fid in enumerate(fids): if nf >= id_valid_start: continue fpath = outfilepath.replace('*', fid) Y = np.fromfile(fpath, dtype='float32') Y = Y.reshape((-1, size)) if verif_stds is None: verif_stds = ((Y - verif_means)**2).sum(axis=0).astype('float64') else: verif_stds += ((Y - verif_means)**2).sum(axis=0).astype('float64') verif_stds /= verif_nbframes - 1 if verbose > 0: # pragma: no cover print('verif_min={}'.format(verif_mins)) print('verif_max={}'.format(verif_maxs)) print('verif_means={}'.format(verif_means)) print('verif_stds={}'.format(verif_stds))