def test_extract_features(utterances_index, features): config = pipeline.get_default_config(features, with_cmvn=False, with_pitch=False) feats = pipeline.extract_features(config, utterances_index) feat1 = feats[utterances_index[0][0]] assert feat1.is_valid() assert feat1.shape[0] == 140 assert feat1.dtype == np.float32 config = pipeline.get_default_config(features, with_cmvn=False, with_pitch=True) feats = pipeline.extract_features(config, utterances_index) feat2 = feats[utterances_index[0][0]] assert feat2.is_valid() assert feat2.shape[0] == 140 assert feat2.shape[1] == feat1.shape[1] + 3 utterances_index = [('u1', utterances_index[0][1], 0, 1)] config = pipeline.get_default_config(features, with_cmvn=False, with_pitch=False) feats = pipeline.extract_features(config, utterances_index) feat3 = feats[utterances_index[0][0]] assert feat3.is_valid() assert feat3.shape[0] == 98 assert feat3.shape[1] == feat1.shape[1]
def test_config_good(features): c1 = pipeline.get_default_config(features, to_yaml=False) c2 = pipeline.get_default_config( features, to_yaml=True, yaml_commented=False) c3 = pipeline.get_default_config( features, to_yaml=True, yaml_commented=True) assert features in c1.keys() assert '#' not in c2 assert '#' in c3 assert equal_dict(c1, yaml.load(c2, Loader=yaml.FullLoader)) assert equal_dict(c1, yaml.load(c3, Loader=yaml.FullLoader))
def command_config(args): config = pipeline.get_default_config( args.features, to_yaml=True, yaml_commented=not args.no_comments, with_pitch=not args.no_pitch, with_cmvn=not args.no_cmvn, with_delta=not args.no_delta) output = sys.stdout if not args.output else open(args.output, 'w') output.write(config)
def test_cmvn(utterances_index, by_speaker, with_vad): config = pipeline.get_default_config( 'mfcc', with_cmvn=True, with_pitch=False, with_delta=False) config['cmvn']['by_speaker'] = by_speaker config['cmvn']['with_vad'] = with_vad feats = pipeline.extract_features(config, utterances_index) feat2 = feats[utterances_index[0][0]] assert feat2.is_valid() assert feat2.shape[0] == 140 assert feat2.shape[1] == 13
def test_check_speakers(utterances_index, capsys): log = utils.get_logger(level='info') config = pipeline.get_default_config('mfcc') with pytest.raises(ValueError) as err: pipeline.extract_features(config, [(utterances_index[0][1], )], log=log) assert 'no speaker information provided' in str(err) capsys.readouterr() # clean the buffer config = pipeline.get_default_config('mfcc', with_cmvn=False) pipeline.extract_features(config, utterances_index, log=log) log_out = capsys.readouterr() assert 'cmvn' not in log_out.err assert '(CMVN disabled)' in log_out.err config = pipeline.get_default_config('mfcc', with_cmvn=True) config['cmvn']['by_speaker'] = False pipeline.extract_features(config, utterances_index, log=log) log_out = capsys.readouterr().err assert 'cmvn by utterance' in log_out assert '(CMVN by speaker disabled)' in log_out
def generate_configurations(conf_directory): try: os.makedirs(conf_directory) except FileExistsError: pass for features in pipeline.valid_features(): conf = os.path.join(conf_directory, f'{features}_only.yaml') yaml = pipeline.get_default_config( features, to_yaml=True, yaml_commented=False, with_cmvn=False, with_delta=False, with_pitch=False) open(conf, 'w').write(yaml) conf = os.path.join(conf_directory, f'{features}_nocmvn.yaml') yaml = pipeline.get_default_config( features, to_yaml=True, yaml_commented=False, with_cmvn=False, with_delta=True, with_pitch=True) open(conf, 'w').write(yaml) conf = os.path.join(conf_directory, f'{features}_full.yaml') yaml = pipeline.get_default_config( features, to_yaml=True, yaml_commented=False, with_cmvn=True, with_delta=True, with_pitch=True) open(conf, 'w').write(yaml)
def test_config_format(utterances_index, capsys, tmpdir, kind): config = pipeline.get_default_config('mfcc', to_yaml=kind != 'dict') if kind == 'file': tempfile = str(tmpdir.join('foo')) open(tempfile, 'w').write(config) config = tempfile if kind == 'str': config2 = 'a:\nb\n' with pytest.raises(ValueError) as err: pipeline._init_config(config2) assert 'error in configuration' in str(err) parsed = pipeline._init_config(config, log=utils.get_logger(level='info')) output = capsys.readouterr().err for word in ('mfcc', 'pitch', 'cmvn', 'delta'): assert word in output assert word in parsed
def test_config_bad(utterances_index): with pytest.raises(ValueError) as err: pipeline.get_default_config('bad') assert 'invalid features "bad"' in str(err) config = pipeline.get_default_config('mfcc') del config['mfcc'] with pytest.raises(ValueError) as err: pipeline.extract_features(config, utterances_index) assert 'the configuration does not define any features' in str(err) config = pipeline.get_default_config('mfcc') config['plp'] = config['mfcc'] with pytest.raises(ValueError) as err: pipeline.extract_features(config, utterances_index) assert 'more than one features extraction processor' in str(err) config = pipeline.get_default_config('mfcc') config['invalid'] = config['mfcc'] with pytest.raises(ValueError) as err: pipeline.extract_features(config, utterances_index) assert 'invalid keys in configuration' in str(err) config = pipeline.get_default_config('mfcc') del config['cmvn']['with_vad'] parsed = pipeline._init_config(config) assert 'cmvn' in parsed assert parsed['cmvn']['with_vad'] config = pipeline.get_default_config('mfcc') del config['cmvn']['by_speaker'] c = pipeline._init_config(config) assert not c['cmvn']['by_speaker'] config = pipeline.get_default_config('mfcc') del config['pitch']['postprocessing'] c = pipeline._init_config(config) assert c['pitch']['postprocessing'] == {}
def test_extract_features_full(ext, wav_file, wav_file_8k, wav_file_float32, capsys, tmpdir): # difficult case with parallel jobs, different sampling rates, # speakers and segments index = [('u1', wav_file, 's1', 0, 1), ('u2', wav_file_float32, 's2', 1, 1.2), ('u3', wav_file_8k, 's1', 1, 3)] config = pipeline.get_default_config('mfcc') # disable VAD because it can alter the cmvn result (far from (0, # 1) when the signal includes non-voiced frames) config['cmvn']['with_vad'] = False feats = pipeline.extract_features(config, index, njobs=2, log=utils.get_logger()) # ensure we have the expected log messages messages = capsys.readouterr().err assert 'INFO - get 3 utterances from 2 speakers in 3 wavs' in messages assert 'WARNING - several sample rates found in wav files' in messages for utt in ('u1', 'u2', 'u3'): assert utt in feats assert feats[utt].dtype == np.float32 # check properies p1 = feats['u1'].properties p2 = feats['u2'].properties p3 = feats['u3'].properties assert p1['audio']['file'] == wav_file assert p1['audio']['duration'] == 1.0 assert p2['audio']['file'] == wav_file_float32 assert p2['audio']['duration'] == pytest.approx(0.2) assert p3['audio']['file'] == wav_file_8k assert p3['audio']['duration'] < 0.5 # ask 3s but get duration-tstart assert p1['mfcc'] == p2['mfcc'] assert p1['mfcc']['sample_rate'] != p3['mfcc']['sample_rate'] assert p1.keys() == { 'audio', 'mfcc', 'cmvn', 'pitch', 'delta', 'speaker', 'pipeline' } assert p1.keys() == p2.keys() == p3.keys() assert p1['pipeline'] == p2['pipeline'] == p3['pipeline'] # check shape. mfcc*delta + pitch = 13 * 3 + 3 = 42 assert feats['u1'].shape == (98, 42) assert feats['u2'].shape == (18, 42) assert feats['u3'].shape == (40, 42) # check cmvn assert feats['u2'].data[:, :13].mean() == pytest.approx(0.0, abs=1e-6) assert feats['u2'].data[:, :13].std() == pytest.approx(1.0, abs=1e-6) data = np.vstack((feats['u1'].data[:, :13], feats['u3'].data[:, :13])) assert data.mean() == pytest.approx(0.0, abs=1e-6) assert data.std() == pytest.approx(1.0, abs=1e-6) assert np.abs(data.mean()) <= np.abs(feats['u1'].data[:, :13].mean()) assert np.abs(data.std() - 1.0) <= np.abs(feats['u1'].data[:, :13].std() - 1.0) assert np.abs(data.mean()) <= np.abs(feats['u3'].data[:, :13].mean()) assert np.abs(data.std() - 1.0) <= np.abs(feats['u3'].data[:, :13].std() - 1.0) # save / load the features filename = str(tmpdir.join('feats' + ext)) feats.save(filename) feats2 = FeaturesCollection.load(filename) assert feats2 == feats
def fun(utts): c = pipeline._init_config( pipeline.get_default_config('mfcc', with_cmvn=False)) u = pipeline._init_utterances(utts) pipeline._Manager(c, u) return u