Exemplo n.º 1
0
def test_logger(capsys, level):
    log = utils.get_logger(level=level)
    log.debug('DEBUG')
    log.info('INFO')
    log.warning('WARNING')
    log.error('ERROR')

    captured = capsys.readouterr()
    assert not captured.out
    if level is logging.ERROR:
        assert 'ERROR' in captured.err
        assert 'WARNING' not in captured.err
        assert 'INFO' not in captured.err
        assert 'DEBUG' not in captured.err
    if level is logging.WARNING:
        assert 'ERROR' in captured.err
        assert 'WARNING' in captured.err
        assert 'INFO' not in captured.err
        assert 'DEBUG' not in captured.err
    if level is logging.INFO:
        assert 'ERROR' in captured.err
        assert 'WARNING' in captured.err
        assert 'INFO' in captured.err
        assert 'DEBUG' not in captured.err
    if level is logging.DEBUG:
        assert 'ERROR' in captured.err
        assert 'WARNING' in captured.err
        assert 'INFO' in captured.err
        assert 'DEBUG' in captured.err
Exemplo n.º 2
0
def test_njobs(capsys, njobs, audio):
    get_logger().setLevel(0)
    signals = {'1': audio}
    p = MfccProcessor(sample_rate=audio.sample_rate)

    if njobs == 0:
        with pytest.raises(ValueError) as err:
            p.process_all(signals, njobs=njobs)
        assert 'must be strictly positive' in str(err)
        return

    features = p.process_all(signals, njobs=njobs)

    if njobs > multiprocessing.cpu_count():
        assert 'CPU cores but reducing to' in capsys.readouterr().err

    assert signals.keys() == features.keys()
Exemplo n.º 3
0
def test_process(capsys, audio, mfcc, weights):
    get_logger(level='debug')

    proc = BottleneckProcessor(weights=weights)
    feat = proc.process(audio)
    assert feat.shape == (140, 80)
    assert feat.shape[1] == proc.ndims
    assert np.allclose(feat.times, mfcc.times)
    assert proc.frame_length == 0.025
    assert proc.frame_shift == 0.01
    assert proc.sample_rate == 8000

    # check the log messages
    captured = capsys.readouterr().err
    assert 'resampling audio from 16000Hz@16b to 8000Hz@16b' in captured
    assert '{} frames of speech detected (on 140 total frames)'.format(
        '118' if audio._sox_binary else '121') in captured
Exemplo n.º 4
0
def test_concatenate_tolerance(capsys):
    get_logger(level='info')
    f1 = Features(np.random.random((12, 2)), np.ones((12, )))
    f2 = Features(np.random.random((10, 2)), np.ones((10, )))

    with pytest.raises(ValueError) as err:
        f1.concatenate(f2, tolerance=0)
    assert 'features have a different number of frames' in str(err)

    with pytest.raises(ValueError) as err:
        f1.concatenate(f2, tolerance=1)
    assert 'features differs number of frames, and greater than ' in str(err)

    f3 = f1.concatenate(f2, tolerance=2)
    assert f3.shape == (10, 4)
    assert 'WARNING' in capsys.readouterr().err

    f3 = f2.concatenate(f1, tolerance=2)
    assert f3.shape == (10, 4)
    assert 'WARNING' in capsys.readouterr().err
Exemplo n.º 5
0
def _extract_pass_one(utt_name, manager, log=get_logger()):
    # load audio signal of the utterance
    log.debug('%s: load audio', utt_name)
    audio = manager.get_audio(utt_name)

    # main features extraction
    log.debug('%s: extract %s', utt_name, manager.features)
    features = manager.get_features_processor(utt_name).process(audio)

    # cmvn accumulation
    if 'cmvn' in manager.config:
        log.debug('%s: accumulate cmvn', utt_name)
        # weight CMVN by voice activity detection (null weights on
        # non-voiced frames)
        if manager.config['cmvn']['with_vad']:
            energy = manager.get_energy_processor(utt_name).process(audio)
            vad = manager.get_vad_processor(utt_name).process(energy)
            vad = vad.data.reshape((vad.shape[0], ))  # reshape as 1d array
        else:
            vad = None

        manager.get_cmvn_processor(utt_name).accumulate(features, weights=vad)

    # pitch extraction
    if 'pitch' in manager.config:
        log.debug('%s: extract pitch', utt_name)
        p1 = manager.get_pitch_processor(utt_name)
        p2 = manager.get_pitch_post_processor(utt_name)
        pitch = p2.process(p1.process(audio))
    else:
        pitch = None

    # add info on speaker and audio input on the features properties
    speaker = manager.utterances[utt_name].speaker
    if speaker:
        features.properties['speaker'] = speaker

    utterance = manager.utterances[utt_name]
    features.properties['audio'] = {
        'file': os.path.abspath(utterance.file),
        'sample_rate': manager._wavs_metadata[utterance.file].sample_rate}
    if utterance.tstart is not None:
        features.properties['audio']['tstart'] = utterance.tstart
        features.properties['audio']['tstop'] = utterance.tstop
        features.properties['audio']['duration'] = min(
            utterance.tstop - utterance.tstart,
            manager._wavs_metadata[utterance.file].duration - utterance.tstart)
    else:
        features.properties['audio']['duration'] = (
            manager._wavs_metadata[utterance.file].duration)

    return utt_name, features, pitch
Exemplo n.º 6
0
def _check_environment(njobs, log=get_logger()):
    if njobs == 1:
        return

    try:
        nthreads = int(os.environ['OMP_NUM_THREADS'])
    except KeyError:
        nthreads = None

    if not nthreads or nthreads > 1:
        log.warning(
            'working on %s threads but implicit parallelism is active, '
            'this may slow down the processing. Set the environment variable '
            'OMP_NUM_THREADS=1 to disable this warning', njobs)
Exemplo n.º 7
0
def command_extract(args):
    # setup the logger (level given by -q/-v arguments)
    if args.quiet:
        log = utils.null_logger()
    else:
        if args.verbose == 0:
            level = 'warning'
        elif args.verbose == 1:
            level = 'info'
        else:  # verbose >= 2
            level = 'debug'
        log = utils.get_logger(name='speech-features', level=level)
    # forward the initialized log to shennong
    utils._logger = log

    # make sure the output file is not already existing and have a
    # valid extension
    output_file = args.output_file
    if os.path.exists(output_file):
        log.error('output file already exist: %s', output_file)
        return
    output_ext = os.path.splitext(output_file)[1]
    if output_ext not in supported_extensions().keys():
        log.error(
            'output file has an unsupported extension "%s", must be in %s',
            output_ext, ", ".join(supported_extensions().keys()))
        return

    # make sure the input config and wavs_index exists
    for filename in (args.config, args.utts_index):
        if not os.path.exists(filename):
            log.error('input file not found: %s', filename)

    # read the utterances file as a list of lists, ignore empty lines
    # in the file
    utterances = [
        utt.split(' ') for utt in
        (utt.strip() for utt in open(args.utts_index, 'r'))
        if utt]

    # run the pipeline
    features = pipeline.extract_features(
        args.config, utterances, njobs=args.njobs, log=log)

    # save the features
    log.info('saving the features to %s', output_file)
    features.save(output_file)
Exemplo n.º 8
0
def test_config_format(utterances_index, capsys, tmpdir, kind):
    config = pipeline.get_default_config('mfcc', to_yaml=kind != 'dict')

    if kind == 'file':
        tempfile = str(tmpdir.join('foo'))
        open(tempfile, 'w').write(config)
        config = tempfile

    if kind == 'str':
        config2 = 'a:\nb\n'
        with pytest.raises(ValueError) as err:
            pipeline._init_config(config2)
        assert 'error in configuration' in str(err)

    parsed = pipeline._init_config(config, log=utils.get_logger(level='info'))
    output = capsys.readouterr().err
    for word in ('mfcc', 'pitch', 'cmvn', 'delta'):
        assert word in output
        assert word in parsed
Exemplo n.º 9
0
    def __init__(self, config, utterances, log=get_logger()):
        self._config = config
        self._utterances = utterances
        self.log = log

        # the list of speakers
        self._speakers = set(u.speaker for u in self.utterances.values())
        if self._speakers == {None}:
            self._speakers = None
        self._check_speakers()

        # store the metadata because we need to access the sample rate
        # for processors instanciation
        wavs = set(u.file for u in utterances.values())
        self._wavs_metadata = {w: Audio.scan(w) for w in wavs}

        # make sure all the wavs are compatible with the pipeline
        log.info(f'scanning {len(self._utterances)} utterances...')
        self._check_wavs()

        # the features type to be extracted
        self.features = [
            k for k in self.config.keys() if k in self._valid_features][0]

        # get some framing parameters constant for all processors
        # (retrieve them from a features processor instance)
        p = self.get_features_processor(next(iter(self.utterances.keys())))
        self.frame_length = p.frame_length
        self.frame_shift = p.frame_shift

        # if CMVN by speaker, instanciate a CMVN processor by speaker
        # here, else instanciate a processor per utterance
        if 'cmvn' in self.config:
            if self.config['cmvn']['by_speaker']:
                self._cmvn_processors = {
                    spk: self.get_processor_class('cmvn')(p.ndims)
                    for spk in self.speakers}
            else:
                self._cmvn_processors = {
                    utt: self.get_processor_class('cmvn')(p.ndims)
                    for utt in self.utterances}
Exemplo n.º 10
0
def main():
    # parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('data_directory', help='input/output data directory')
    parser.add_argument('config_file', help='YAML configuration file')
    parser.add_argument(
        'corpus', choices=['english', 'xitsonga'], help='corpus to process')
    parser.add_argument(
        '-j', '--njobs', type=int, default=4, metavar='<int>',
        help='number of parallel jobs (default to %(default)s)')
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='increase log level')
    args = parser.parse_args()

    # check and setup arguments
    data_directory = args.data_directory
    if not os.path.isdir(data_directory):
        raise ValueError(f'directory not found: {data_directory}')
    config = args.config_file
    if not os.path.isfile(config):
        raise ValueError(f'file not found: {config}')
    try:
        os.makedirs(os.path.join(data_directory, 'features'))
    except FileExistsError:
        pass
    log = get_logger(level='debug' if args.verbose else 'info')

    # load input utterances
    utterances = [line.split(' ') for line in open(os.path.join(
        data_directory, f'{args.corpus}.utts'), 'r')]

    # extract the features
    features = pipeline.extract_features(
        config, utterances, njobs=args.njobs, log=log)

    # save them
    h5f_file = os.path.join(
        data_directory, 'features', f'{args.corpus}_{os.path.basename(config)}'
        .replace('.yaml', '.h5f'))
    features.save(h5f_file)
Exemplo n.º 11
0
def test_check_speakers(utterances_index, capsys):
    log = utils.get_logger(level='info')

    config = pipeline.get_default_config('mfcc')
    with pytest.raises(ValueError) as err:
        pipeline.extract_features(config, [(utterances_index[0][1], )],
                                  log=log)
    assert 'no speaker information provided' in str(err)

    capsys.readouterr()  # clean the buffer
    config = pipeline.get_default_config('mfcc', with_cmvn=False)
    pipeline.extract_features(config, utterances_index, log=log)
    log_out = capsys.readouterr()
    assert 'cmvn' not in log_out.err
    assert '(CMVN disabled)' in log_out.err

    config = pipeline.get_default_config('mfcc', with_cmvn=True)
    config['cmvn']['by_speaker'] = False
    pipeline.extract_features(config, utterances_index, log=log)
    log_out = capsys.readouterr().err
    assert 'cmvn by utterance' in log_out
    assert '(CMVN by speaker disabled)' in log_out
Exemplo n.º 12
0
def _extract_pass_two(utt_name, manager, features, pitch,
                      tolerance=2, log=get_logger()):
    # apply cmvn
    if 'cmvn' in manager.config:
        log.debug('%s: apply cmvn', utt_name)
        features = manager.get_cmvn_processor(utt_name).process(features)

    # apply delta
    if 'delta' in manager.config:
        log.debug('%s: apply delta', utt_name)
        features = manager.get_delta_processor(utt_name).process(features)

    # concatenate the pitch features to the main ones. because of
    # downsampling in pitch processing the resulting number of frames
    # can differ (the same tolerance is applied in Kaldi, see
    # the paste-feats binary)
    if pitch:
        log.debug('%s: concatenate pitch', utt_name)
        features._log = log
        features = features.concatenate(pitch, tolerance=tolerance)

    return utt_name, features
Exemplo n.º 13
0
def _extract_features(config, utterances, njobs=1, log=get_logger()):
    # the manager will instanciate the pipeline components
    manager = _Manager(config, utterances, log=log)

    # verbosity level for joblib (no joblib verbosity on debug level
    # (level <= 10) because each step is already detailed in inner
    # loops
    verbose = 8 if log.getEffectiveLevel() > 10 else 0

    # cmvn : two passes. 1st with features pitch and cmvn
    # accumulation, 2nd with cmvn application and delta
    if 'cmvn' in config:
        # extract features and pitch, accumulate cmvn stats
        pass_one = _Parallel(
            'features extraction, pass 1', log,
            n_jobs=njobs, verbose=verbose, prefer='threads')(
                joblib.delayed(_extract_pass_one)(
                    utterance, manager, log=log) for utterance in utterances)

        # apply cmvn and extract deltas
        features = FeaturesCollection(**{k: v for k, v in _Parallel(
            'features extraction, pass 2', log,
            n_jobs=njobs, verbose=verbose, prefer='threads')(
                joblib.delayed(_extract_pass_two)(
                    utterance, manager, features, pitch, log=log)
                for utterance, features, pitch in pass_one)})

    # no cmvn: single pass
    else:
        features = FeaturesCollection(**{k: v for k, v in _Parallel(
            'features extraction', log,
            n_jobs=njobs, verbose=verbose, prefer='threads')(
                joblib.delayed(_extract_single_pass)(
                    utterance, manager, log=log) for utterance in utterances)})

    return features
Exemplo n.º 14
0
def test_extract_features_full(ext, wav_file, wav_file_8k, wav_file_float32,
                               capsys, tmpdir):
    # difficult case with parallel jobs, different sampling rates,
    # speakers and segments
    index = [('u1', wav_file, 's1', 0, 1),
             ('u2', wav_file_float32, 's2', 1, 1.2),
             ('u3', wav_file_8k, 's1', 1, 3)]
    config = pipeline.get_default_config('mfcc')

    # disable VAD because it can alter the cmvn result (far from (0,
    # 1) when the signal includes non-voiced frames)
    config['cmvn']['with_vad'] = False

    feats = pipeline.extract_features(config,
                                      index,
                                      njobs=2,
                                      log=utils.get_logger())

    # ensure we have the expected log messages
    messages = capsys.readouterr().err
    assert 'INFO - get 3 utterances from 2 speakers in 3 wavs' in messages
    assert 'WARNING - several sample rates found in wav files' in messages

    for utt in ('u1', 'u2', 'u3'):
        assert utt in feats
        assert feats[utt].dtype == np.float32

    # check properies
    p1 = feats['u1'].properties
    p2 = feats['u2'].properties
    p3 = feats['u3'].properties
    assert p1['audio']['file'] == wav_file
    assert p1['audio']['duration'] == 1.0
    assert p2['audio']['file'] == wav_file_float32
    assert p2['audio']['duration'] == pytest.approx(0.2)
    assert p3['audio']['file'] == wav_file_8k
    assert p3['audio']['duration'] < 0.5  # ask 3s but get duration-tstart
    assert p1['mfcc'] == p2['mfcc']
    assert p1['mfcc']['sample_rate'] != p3['mfcc']['sample_rate']
    assert p1.keys() == {
        'audio', 'mfcc', 'cmvn', 'pitch', 'delta', 'speaker', 'pipeline'
    }
    assert p1.keys() == p2.keys() == p3.keys()
    assert p1['pipeline'] == p2['pipeline'] == p3['pipeline']

    # check shape. mfcc*delta + pitch = 13 * 3 + 3 = 42
    assert feats['u1'].shape == (98, 42)
    assert feats['u2'].shape == (18, 42)
    assert feats['u3'].shape == (40, 42)

    # check cmvn
    assert feats['u2'].data[:, :13].mean() == pytest.approx(0.0, abs=1e-6)
    assert feats['u2'].data[:, :13].std() == pytest.approx(1.0, abs=1e-6)

    data = np.vstack((feats['u1'].data[:, :13], feats['u3'].data[:, :13]))
    assert data.mean() == pytest.approx(0.0, abs=1e-6)
    assert data.std() == pytest.approx(1.0, abs=1e-6)
    assert np.abs(data.mean()) <= np.abs(feats['u1'].data[:, :13].mean())
    assert np.abs(data.std() - 1.0) <= np.abs(feats['u1'].data[:, :13].std() -
                                              1.0)
    assert np.abs(data.mean()) <= np.abs(feats['u3'].data[:, :13].mean())
    assert np.abs(data.std() - 1.0) <= np.abs(feats['u3'].data[:, :13].std() -
                                              1.0)

    # save / load the features
    filename = str(tmpdir.join('feats' + ext))
    feats.save(filename)
    feats2 = FeaturesCollection.load(filename)
    assert feats2 == feats
Exemplo n.º 15
0
def test_check_environment(capsys):
    if 'OMP_NUM_THREADS' in os.environ:
        del os.environ['OMP_NUM_THREADS']
    pipeline._check_environment(2, log=utils.get_logger())
    out = capsys.readouterr().err
    assert 'working on 2 threads but implicit parallelism is active' in out
Exemplo n.º 16
0
def _init_config(config, log=get_logger()):
    try:
        if os.path.isfile(config):
            log.debug('loading configuration from %s', config)
            config = open(config, 'r').read()
    except TypeError:
        pass

    if isinstance(config, str):
        # the config is a string, try to load it as a YAML
        try:
            config = yaml.load(config, Loader=yaml.FullLoader)
        except yaml.YAMLError as err:
            raise ValueError('error in configuration: {}', str(err))

    # ensure all the keys in config are known
    unknown_keys = [
        k for k in config.keys()
        if k not in _Manager._valid_processors]
    if unknown_keys:
        raise ValueError(
            'invalid keys in configuration: {}'.format(
                ', '.join(unknown_keys)))

    # ensure one and only one features processor is defined in the
    # configuration
    features = [k for k in config.keys() if k in valid_features()]
    if not features:
        raise ValueError(
            'the configuration does not define any features extraction, '
            'only post-processing (must have one and only one entry of {})'
            .format(', '.join(valid_features())))
    if len(features) > 1:
        raise ValueError(
            'more than one features extraction processors are defined, '
            '(must have one and only one entry of {}): {}'
            .format(', '.join(valid_features()), ', '.join(features)))

    if 'cmvn' in config:
        # force by_speaker to False if not existing
        if 'by_speaker' not in config['cmvn']:
            log.warning(
                'by_speaker option not specified for cmvn, '
                'assuming it is false and doing cmvn by utterance')
            config['cmvn']['by_speaker'] = False
        # force with_vad to True if not existing
        if 'with_vad' not in config['cmvn']:
            config['cmvn']['with_vad'] = True

    # if pitch, make sure we have a 'postprocessing' entry
    if 'pitch' in config and 'postprocessing' not in config['pitch']:
        config['pitch']['postprocessing'] = {}

    # log message describing the pipeline configuration
    msg = []
    if 'pitch' in config:
        msg.append('pitch')
    if 'delta' in config:
        msg.append('delta')
    if 'cmvn' in config:
        by = 'speaker' if config['cmvn']['by_speaker'] else 'utterance'
        vad = ' with vad' if config['cmvn']['with_vad'] else ''
        msg.append('cmvn by {}{}'.format(by, vad))
    log.info(
        'pipeline configured for %s features extraction%s',
        features[0], ' with {}'.format(', '.join(msg)) if msg else '')

    return config
Exemplo n.º 17
0
def _extract_single_pass(utt_name, manager, log=get_logger()):
    _, features, pitch = _extract_pass_one(utt_name, manager, log=log)
    return _extract_pass_two(utt_name, manager, features, pitch, log=log)
Exemplo n.º 18
0
def _init_utterances(utts_index, log=get_logger()):
    """Returns a dict {utt_id: (wav_file, speaker_id, tstart, tstop)}

    Raises on any error, log a warning on strange but non-critical
    issues.

    """
    # guess the for format of `wavs` and ensure it is homogeneous
    utts = list((u,) if isinstance(u, str) else u for u in utts_index)
    index_format = set(len(u) for u in utts)
    if not len(index_format) == 1:
        raise ValueError(
            'the wavs index is not homogeneous, entries have different '
            'lengths: {}'.format(', '.join(str(t) for t in index_format)))
    index_format = list(index_format)[0]

    # ensure the utterances index format is valid
    valid_formats = {
        1: '<wav-file>',
        2: '<utterance-id> <wav-file>',
        3: '<utterance-id> <wav-file> <speaker-id>',
        4: '<utterance-id> <wav-file> <tstart> <tstop>',
        5: '<utterance-id> <wav-file> <speaker-id> <tstart> <tstop>'}
    try:
        log.info(
            'detected format for utterances index is: %s',
            valid_formats[index_format])
    except KeyError:
        raise ValueError('unknown format for utterances index')

    # ensure 1st column has unique elements
    duplicates = [u for u, c in collections.Counter(
        u[0] for u in utts).items() if c > 1]
    if duplicates:
        raise ValueError(
            'duplicates found in utterances index: {}'.format(
                ', '.join(duplicates)))

    # sort the utterances by wav_file (and then by utt_id), this
    # is a minor optimization to use the cache system of Audio.load(),
    # ie this avoids to reload several times the same wav when using
    # tstart/tstop segments.
    utts = sorted(utts, key=lambda u: u if index_format == 1 else (u[1], u[0]))

    # build the utterances collection as a dict
    # {utt_id: (wav_file, speaker_id, tstart, tstop)}
    utterances = {}
    for n, utt in enumerate(utts, start=1):
        if index_format == 1:
            utt_id = 'utt_{}'.format(str(n))
            wav_file = utt[0]
        else:
            utt_id = utt[0]
            wav_file = utt[1]

        utterances[utt_id] = _Utterance(
            file=wav_file,
            speaker=utt[2] if index_format in (3, 5) else None,
            tstart=(float(utt[2]) if index_format == 4
                    else float(utt[3]) if index_format == 5 else None),
            tstop=(float(utt[3]) if index_format == 4
                   else float(utt[4]) if index_format == 5 else None))

    # ensure all the wavs are here
    wavs = [w.file for w in utterances.values()]
    not_found = [w for w in wavs if not os.path.isfile(w)]
    if not_found:
        raise ValueError(
            'the following wav files are not found: {}'
            .format(', '.join(not_found)))

    return utterances
Exemplo n.º 19
0
def test_logger_bad_level():
    with pytest.raises(ValueError) as err:
        utils.get_logger(level='bad')
    assert 'invalid logging level' in str(err.value)
Exemplo n.º 20
0
    'https://raw.githubusercontent.com/bootphon/ABXpy/'
    'zerospeech2015/resources/english.item')

XITSONGA_ITEM = (
    'https://raw.githubusercontent.com/bootphon/ABXpy/'
    'zerospeech2015/resources/xitsonga.item')

ENGLISH_FILES_LIST = (
    'https://raw.githubusercontent.com/bootphon/'
    'Zerospeech2015/master/english_files.txt')

XITSONGA_FILES_LIST = (
    'https://raw.githubusercontent.com/bootphon/'
    'Zerospeech2015/master/xitsonga_files.txt')

log = get_logger(level='info')


def setup_data(data_directory, buckeye_directory, xitsonga_directory):
    """Setup a data directory with all input data required

    * creates the ``data_directory``
    * make a symlink to ``buckeye_directory`` and ``xitsonga_directory`` in it
    * download the ABX item files for buckeye and xitsonga
    * create the list of utterances both corpora
    * create the configuration files  for features extraction

    """
    # basic checks
    if not os.path.isdir(buckeye_directory):
        raise ValueError(f'directory does not exists: {buckeye_directory}')
Exemplo n.º 21
0
class FeaturesSerializer(metaclass=abc.ABCMeta):
    """Base class of a features file serializer

    This class must be specialized to handle a given file type.

    Parameters
    ----------
    cls : class
        Must be :class:`shennong.features.FeaturesCollection`, this is
        a tweak to avoid circular imports
    filename : str
        The file to save/load features to/from

    """
    _log = get_logger(__name__)

    def __init__(self, cls, filename):
        self._features_collection = cls
        self._features = self._features_collection._value_type
        self._filename = filename

    @property
    def filename(self):
        return self._filename

    @abc.abstractmethod
    def _save(self, features):  # pragma: nocover
        pass

    @abc.abstractmethod
    def _load(self):  # pragma: nocover
        pass

    def load(self, **kwargs):
        """Returns a collection of features from the `filename`

        Returns
        -------
        features : :class:`~shennong.features.FeaturesCollection`
            The features stored in the file.
        kwargs : optional
            Optional supplementary arguments, specific to each serializer.

        Raises
        ------
        IOError
            If the input file does not exist or cannot be read.

        ValueError
            If the features cannot be loaded from the file or are not
            in a valid state.

        """
        if not os.path.isfile(self.filename):
            raise IOError('file not found: {}'.format(self.filename))
        if not os.access(self.filename, os.R_OK):
            raise IOError('file not readable: {}'.format(self.filename))

        features = self._load(**kwargs)

        if not features.is_valid():
            raise ValueError('features not valid in file: {}'.format(
                self.filename))

        return features

    def save(self, features, **kwargs):
        """Saves a collection of `features` to a file

        Parameters
        ----------
        features : :class:`~shennong.features.FeaturesCollection`
            The features to store in the file.
        kwargs : optional
            Optional supplementary arguments, specific to each serializer.

        Raises
        ------
        IOError
            If the output file already exists.

        ValueError
            If the features cannot be saved to the file, are not in a
            valid state or are not an instance of
            :class:`~shennong.features.FeaturesCollection`.

        """
        if os.path.isfile(self.filename):
            raise IOError('file already exists: {}'.format(self.filename))

        if not isinstance(features, self._features_collection):
            raise ValueError('features must be {} but are {}'.format(
                self._features_collection.__name__,
                features.__class__.__name__))

        if not features.is_valid():
            raise ValueError('features are not valid')

        self._save(features, **kwargs)
Exemplo n.º 22
0
def extract_features(configuration, utterances_index,
                     njobs=1, log=get_logger()):
    """Speech features extraction pipeline

    Given a pipeline ``configuration`` and an ``utterances_index``
    defining a list of utterances on which to extract features, this
    function applies the whole pipeline and returns the extracted
    features as an instance of
    :class:`~shennong.features.features.FeaturesCollection`. It uses
    ``njobs`` parallel subprocesses.

    The utterances in the ``utterances_index`` can be defined in one
    of the following format (the format must be homogoneous across the
    index, i.e. only one format can be used):

    * 1-uple (or str): ``<wav-file>``
    * 2-uple: ``<utterance-id> <wav-file>``
    * 3-uple: ``<utterance-id> <wav-file> <speaker-id>``
    * 4-uple: ``<utterance-id> <wav-file> <tstart> <tstop>``
    * 5-uple: ``<utterance-id> <wav-file> <speaker-id> <tstart> <tstop>``

    Parameters
    ----------
    config : dict or str
        The pipeline configuration, can be a dictionary, a path to a
        YAML file or a string formatted in YAML. To get a
        configuration example, see :func:`get_default_config`
    utterances_index : sequence of tuples
        The list of utterances to extract the features on.
    njobs : int, optional
        The number to subprocesses to execute in parallel, use a
        single process by default.
    log : logging.Logger
        A logger to display messages during pipeline execution

    Returns
    -------
    features : :class:`~shennong.features.features.FeaturesCollection`
       The extracted speech features

    Raises
    ------
    ValueError
        If the ``configuration`` or the ``utterances_index`` are
        invalid, or if something goes wrong during features
        extraction.

    """
    # intialize the pipeline configuration, the list of wav files to
    # process, instanciate the pipeline processors and make all the
    # checks to ensure all is correct
    njobs = get_njobs(njobs, log=log)
    config = _init_config(configuration, log=log)
    utterances = _init_utterances(utterances_index, log=log)

    # check the OMP_NUM_THREADS variable for parallel computations
    _check_environment(njobs, log=log)

    # do all the computations
    return _extract_features(
        config, utterances, njobs=njobs, log=log)