def main(): parser = argparse.ArgumentParser() parser.add_argument('utterances_file', type=pathlib.Path, help='path to the utterances') parser.add_argument('-j', '--njobs', type=int, default=get_njobs(), help='number of parallel jobs, default to %(default)s') args = parser.parse_args() output_file = str(args.utterances_file).replace('segments', 'warps').replace( '.utt', '.warp') pathlib.Path(output_file).parent.mkdir(exist_ok=True) utterances = Utterances.load(args.utterances_file) processor = VtlnProcessor() processor.set_logger('info') warps = processor.process(utterances, njobs=args.njobs, group_by='speaker') with open(output_file, 'w') as fout: for spk, warp in sorted(warps.items()): fout.write(f'{spk} {warp}\n')
def extract_features_warp(configuration, utterances, warp, log, njobs=1): """Speech features extraction pipeline when all features are warped by the same factor. Used in the :func:`~shennong.features.processor.vtln.VtlnProcessor.process` method of the :class:`~shennong.features.processor.vtln.VtlnProcessor`. """ # intialize the pipeline configuration, the list of wav files to # process, instanciate the pipeline processors and make all the # checks to ensure all is correct njobs = get_njobs(njobs, log=log) config = _init_config(configuration, log=log) # check the OMP_NUM_THREADS variable for parallel computations _check_environment(njobs, log=log) manager = PipelineManager(config, utterances, log=log) # verbosity level for joblib (no joblib verbosity on debug level # (level <= 10) because each step is already detailed in inner # loops verbose = 8 if log.getEffectiveLevel() > 10 else 0 return FeaturesCollection( _Parallel(f'features extraction with warp {warp}', log, n_jobs=njobs, verbose=verbose, prefer='threads')(joblib.delayed(_extract_single_pass_warp)( utterance, manager, warp, log=log) for utterance in utterances))
def process_all(self, utterances, njobs=None, **kwargs): """Returns features processed from several input `utterances` This function processes the features in parallel jobs. Parameters ---------- utterances: :class`~shennong.uttterances.Utterances` The utterances on which to process features on. njobs: int, optional The number of parallel jobs to run in background. Default to the number of CPU cores available on the machine. **kwargs: dict, optional Extra arguments to be forwarded to the `process` method. Keys must be the same as for `utterances`. Returns ------- features: :class:`~shennong.features_collection.FeaturesCollection` The computed features on each input signal. The keys of output `features` are the keys of the input `utterances`. Raises ------ ValueError If the `njobs` parameter is <= 0 or if an entry is missing in optioanl kwargs. """ # checks the number of background jobs njobs = get_njobs(njobs, log=self.log) # check the extra arguments for name, value in kwargs.items(): if not isinstance(value, dict): raise ValueError(f'argument "{name}" is not a dict') if value.keys() != utterances.by_name().keys(): raise ValueError( f'utterances and "{name}" have different names') def _process_one(utterance, **kwargs): return utterance.name, self.process( utterance.load_audio(), **{k: v[utterance.name] for k, v in kwargs.items()}) verbose = 8 if self.log.getEffectiveLevel() > 10 else 0 return FeaturesCollection( joblib.Parallel(n_jobs=njobs, verbose=verbose, prefer='threads')( joblib.delayed(_process_one)(utt, **kwargs) for utt in utterances))
def process_all(self, signals, njobs=None): """Returns features processed from several input `signals` This function processes the features in parallel jobs. Parameters ---------- signals: dict of :class`~shennong.audio.Audio` A dictionnary of input audio signals to process features on, where the keys are item names and values are audio signals. njobs: int, optional The number of parallel jobs to run in background. Default to the number of CPU cores available on the machine. Returns ------- features: :class:`~shennong.features.features.FeaturesCollection` The computed features on each input signal. The keys of output `features` are the keys of the input `signals`. Raises ------ ValueError If the `njobs` parameter is <= 0 """ # checks the number of background jobs njobs = get_njobs(njobs, log=self._log) def _process_one(name, signal): return name, self.process(signal) return FeaturesCollection( **{ k: v for k, v in joblib.Parallel( n_jobs=njobs, verbose=0, backend='threading')( joblib.delayed(_process_one)(name, signal) for name, signal in signals.items()) })
def main(): """Train VTLN, extract warps and apply warped MFCC on Buckeye corpus""" parser = argparse.ArgumentParser() parser.add_argument('buckeye_corpus', type=pathlib.Path, help='path to the raw Buckeye Corpus') parser.add_argument('output_file', type=pathlib.Path, help='where to save the computed MFCCs') parser.add_argument( '-j', '--njobs', type=int, default=get_njobs(), help='number of parallel jobs to use, default to %(default)s') parser.add_argument('-d', '--duration', type=float, default=10 * 60, help=('speech duration per speaker for VTLN training, ' 'default to %(default)s')) parser.add_argument('--warp-step', type=float, default=0.01, help='VTLN warp step, default to %(default)s') parser.add_argument('--warp-min', type=float, default=0.85, help='VTLN min warp, default to %(default)s') parser.add_argument('--warp-max', type=float, default=1.25, help='VTLN max warp, default to %(default)s') args = parser.parse_args() # check input parameters if args.output_file.exists(): raise ValueError(f'{args.output_file} already exists') if not args.buckeye_corpus.is_dir(): raise ValueError(f'{args.buckeye_corpus} is not a directory') # generates utterances from the Buckeye corpus utterances = prepare_buckeye(args.buckeye_corpus) # extract 10m of speech per speaker to train VTLN vtln_utterances = utterances.fit_to_duration(args.duration) # compute the VTLN warps coefficients print(f'training VTLN on {args.duration}s per speaker ' f'({len(vtln_utterances)} utterances)') processor = VtlnProcessor(warp_step=args.warp_step, min_warp=args.warp_min, max_warp=args.warp_max) processor.set_logger('info') warps = processor.process(vtln_utterances, njobs=args.njobs, group_by='speaker') print('VTLN warps per speaker are:') for spk, warp in sorted(warps.items()): print(f'{spk}: {warp}') # convert warps from speaker to utterance in the whole corpus warps = {utt.name: warps[utt.speaker] for utt in utterances} print(f'computing warped MFCCs for {len(utterances)} uttterances') features = MfccProcessor().process_all(utterances, vtln_warp=warps, njobs=args.njobs) print(f'writing MFCCs to {args.output_file}') features.save(args.output_file)
def extract_features(configuration, utterances_index, njobs=1, log=get_logger()): """Speech features extraction pipeline Given a pipeline ``configuration`` and an ``utterances_index`` defining a list of utterances on which to extract features, this function applies the whole pipeline and returns the extracted features as an instance of :class:`~shennong.features.features.FeaturesCollection`. It uses ``njobs`` parallel subprocesses. The utterances in the ``utterances_index`` can be defined in one of the following format (the format must be homogoneous across the index, i.e. only one format can be used): * 1-uple (or str): ``<wav-file>`` * 2-uple: ``<utterance-id> <wav-file>`` * 3-uple: ``<utterance-id> <wav-file> <speaker-id>`` * 4-uple: ``<utterance-id> <wav-file> <tstart> <tstop>`` * 5-uple: ``<utterance-id> <wav-file> <speaker-id> <tstart> <tstop>`` Parameters ---------- config : dict or str The pipeline configuration, can be a dictionary, a path to a YAML file or a string formatted in YAML. To get a configuration example, see :func:`get_default_config` utterances_index : sequence of tuples The list of utterances to extract the features on. njobs : int, optional The number to subprocesses to execute in parallel, use a single process by default. log : logging.Logger A logger to display messages during pipeline execution Returns ------- features : :class:`~shennong.features.features.FeaturesCollection` The extracted speech features Raises ------ ValueError If the ``configuration`` or the ``utterances_index`` are invalid, or if something goes wrong during features extraction. """ # intialize the pipeline configuration, the list of wav files to # process, instanciate the pipeline processors and make all the # checks to ensure all is correct njobs = get_njobs(njobs, log=log) config = _init_config(configuration, log=log) utterances = _init_utterances(utterances_index, log=log) # check the OMP_NUM_THREADS variable for parallel computations _check_environment(njobs, log=log) # do all the computations return _extract_features( config, utterances, njobs=njobs, log=log)
def extract_features(configuration, utterances, warps=None, njobs=1, log=get_logger('pipeline', 'warning')): """Speech features extraction pipeline Given a pipeline ``configuration`` and ``utterances`` defining a list of utterances on which to extract features, this function applies the whole pipeline and returns the extracted features as an instance of :class:`~shennong.features.features.FeaturesCollection`. It uses ``njobs`` parallel subprocesses. Parameters ---------- config : dict or str The pipeline configuration, can be a dictionary, a path to a YAML file or a string formatted in YAML. To get a configuration example, see :func:`get_default_config` utterances : :class:`~shennong.utterances.Utterances` The list of utterances to extract the features on. warps : dict, optional A dictionnary of precomputed VTLN warps coefficients to be applied on features. Must be a dict (str: float) of warps indexed either by utterances speaker or name. Both the ``warps`` argument and the config['vtln'] entry must not be defined together. njobs : int, optional The number to subprocesses to execute in parallel, use a single process by default. log : logging.Logger A logger to display messages during pipeline execution Returns ------- features : :class:`~shennong.features.features.FeaturesCollection` The extracted speech features Raises ------ ValueError If the ``configuration`` or the ``utterances`` are invalid, if both the ``warps`` argument and the 'vtln' entry in configuration are defined or if something goes wrong during features extraction. """ # intialize the pipeline configuration, the list of wav files to # process, instanciate the pipeline processors and make all the # checks to ensure all is correct njobs = get_njobs(njobs, log=log) config = _init_config(configuration, log=log) log.info('detected format for utterances index is: %s', utterances.format(type=str)) # make sure the warps are valid (not overloading 'vtln' in config and # either by speaker or by utterance. If defined per speaker convert them by # utterance) if warps: warps = _init_warps(warps, config, utterances, log) # check the OMP_NUM_THREADS variable for parallel computations _check_environment(njobs, log=log) # do all the computations return _extract_features(config, utterances, warps, njobs=njobs, log=log)