def test_spc_and_npow(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5) _, spc, _ = af.analyze(x) npow = af.npow() assert spc.shape[0] == npow.shape[0]
def _analyze_world_features(self, x, f0_only=False): feat = FeatureExtractor( analyzer="world", fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"], minf0=self.sconf["minf0"], maxf0=self.sconf["maxf0"], ) # analyze world based features self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x) self.feats["uv"], self.feats["cf0"] = convert_continuos_f0( self.feats["f0"]) self.feats["lf0"] = np.log(self.feats["f0"] + EPS) self.feats["lcf0"] = np.log(self.feats["cf0"]) if f0_only: return if self.conf["fftl"] != 256 and self.conf["fs"] > 16000: # NOTE: 256 fft_size sometimes causes errors self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"], alpha=self.conf["mcep_alpha"]) self.feats["npow"] = feat.npow() self.feats["cap"] = feat.codeap() cap = self.feats["cap"] self.feats["ccap"] = np.zeros(cap.shape) self.feats["cap_uv"] = np.zeros(cap.shape) for d in range(self.feats["ccap"].shape[-1]): cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0 ( self.feats["cap_uv"][:, d], self.feats["ccap"][:, d], ) = convert_continuos_f0(cap[:, d])
def main(*argv): argv = argv if argv else sys.argv[1:] dcp = 'Create histogram for speaker-dependent configure' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Directory of wav file') parser.add_argument('figure_dir', type=str, help='Directory for figure output') args = parser.parse_args(argv) # open list file with open(args.list_file, 'r') as fp: files = fp.readlines() f0s = [] npows = [] for f in files: # open waveform f = f.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) print("Extract: " + wavf) # constract FeatureExtractor class feat = FeatureExtractor(analyzer='world', fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() f0s.append(f0) npows.append(npow) f0s = np.hstack(f0s).flatten() npows = np.hstack(npows).flatten() # create a histogram to visualize F0 range of the speaker f0histogrampath = os.path.join(args.figure_dir, args.speaker + '_f0histogram.png') create_histogram(f0s, f0histogrampath, range_min=40, range_max=700, step=50, xlabel='Fundamental frequency [Hz]') # create a histogram to visualize npow range of the speaker npowhistogrampath = os.path.join(args.figure_dir, args.speaker + '_npowhistogram.png') create_histogram(npows, npowhistogrampath, range_min=-70, range_max=20, step=10, xlabel="Frame power [dB]")
def extract_f0_and_npow(wavf): """ F0 and npow extraction Parameters ---------- wavf : str, File path of waveform file Returns ------- dict : Dictionary consisting of F0 and npow arrays """ # open waveform print("Extract: " + wavf) fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) # constract FeatureExtractor class feat = FeatureExtractor(analyzer="world", fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() return {"f0": f0, "npow": npow}
def _analyze_world_features(self, x): feat = FeatureExtractor( analyzer="world", fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"], minf0=self.sconf["minf0"], maxf0=self.sconf["maxf0"], ) # analyze world based features self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x) self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"], alpha=self.conf["mcep_alpha"]) self.feats["npow"] = feat.npow() self.feats["cap"] = feat.codeap() cap = self.feats["cap"] self.feats["ccap"] = np.zeros(cap.shape) self.feats["cap_uv"] = np.zeros(cap.shape) for d in range(self.feats["ccap"].shape[-1]): cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0 self.feats["cap_uv"][:, d], self.feats[ "ccap"][:, d] = convert_continuos_f0(cap[:, d]) self.feats["uv"], self.feats["cf0"] = convert_continuos_f0( self.feats["f0"]) self.feats["lf0"] = np.log(self.feats["f0"] + EPS) self.feats["lcf0"] = np.log(self.feats["cf0"])
def main(*argv): argv = argv if argv else sys.argv[1:] dcp = 'Create histogram for speaker-dependent configure' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Directory of wav file') parser.add_argument('figure_dir', type=str, help='Directory for figure output') args = parser.parse_args(argv) # open list file with open(args.list_file, 'r') as fp: files = fp.readlines() f0s = [] npows = [] for f in files: # open waveform f = f.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) print("Extract: " + wavf) # constract FeatureExtractor class feat = FeatureExtractor(analyzer='world', fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() f0s.append(f0) npows.append(npow) f0s = np.hstack(f0s).flatten() npows = np.hstack(npows).flatten() # create a histogram to visualize F0 range of the speaker f0histogrampath = os.path.join( args.figure_dir, args.speaker + '_f0histogram.png') create_histogram(f0s, f0histogrampath, range_min=40, range_max=700, step=50, xlabel='Fundamental frequency [Hz]') # create a histogram to visualize npow range of the speaker npowhistogrampath = os.path.join( args.figure_dir, args.speaker + '_npowhistogram.png') create_histogram(npows, npowhistogrampath, range_min=-70, range_max=20, step=10, xlabel="Frame power [dB]")
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def world_feature_extract(queue, wav_list, config): """WORLD feature extraction Args: queue (multiprocessing.Queue): the queue to store the file name of utterance wav_list (list): list of the wav files config (dict): feature extraction config """ # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=config['sampling_rate'], shiftms=config['shiftms'], minf0=config['minf0'], maxf0=config['maxf0'], fftl=config['fft_size']) # extraction for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) x = np.array(x, dtype=np.float32) if config['highpass_cutoff'] != 0: x = low_cut_filter(x, fs, cutoff=config['highpass_cutoff']) # check sampling frequency if not fs == config['sampling_rate']: logging.error("sampling frequency of %s is not matched." % wav_name) sys.exit(1) # extract features f0, spc, ap = feature_extractor.analyze(x) codeap = feature_extractor.codeap() mcep = feature_extractor.mcep(dim=config['mcep_dim'], alpha=config['mcep_alpha']) npow = feature_extractor.npow() uv, cont_f0 = convert_continuos_f0(f0) lpf_fs = int(1.0 / (config['shiftms'] * 0.001)) cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20) next_cutoff = 70 while not (cont_f0_lpf >= [0]).all(): cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff) next_cutoff *= 2 # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1) # save feature feat_name = path_replace(wav_name, config['indir'], config['outdir'], extname=config['feature_format']) write_hdf5(feat_name, "/%s" % (config["feat_type"]), feats) if config['save_f0']: write_hdf5(feat_name, "/f0", f0) if config['save_ap']: write_hdf5(feat_name, "/ap", ap) if config['save_spc']: write_hdf5(feat_name, "/spc", spc) if config['save_npow']: write_hdf5(feat_name, "/npow", npow) queue.put('Finish')
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis(f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(): if args.file == 'con': file = 'converted' elif args.file == 'tar': file = 'target' else: raise ValueError("The file is incorrect") feat = FeatureExtractor(analyzer='world', fs=22050, fftl=1024, shiftms=5, minf0=args.minf0, maxf0=args.maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=22050, fftl=1024, shiftms=5) # open list file with open('./list/' + file + '.list', 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join('./' + file + '/h5f/', f + '.h5') if (not os.path.exists(h5f)): wavf = os.path.join('./' + file + '/wav/', f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=34, alpha=0.544) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=0.544, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join('./' + file + '/anasyn/', f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)