def test_save(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer().load( filename=os.path.join('material', 'test.mfcc.cpickle')) feature_container.save( filename=os.path.join('material', 'saved.mfcc.cpickle'))
def test_with_statement(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle')) with FeatureNormalizer() as feature_normalizer: feature_normalizer.accumulate(feature_container) nose.tools.eq_(feature_normalizer['N'][0], 501) numpy.testing.assert_array_equal(feature_normalizer['mean'][0][0], numpy.mean(feature_container.feat[0], axis=0)) numpy.testing.assert_array_equal(feature_normalizer['S1'][0], numpy.sum(feature_container.feat[0], axis=0)) numpy.testing.assert_array_equal(feature_normalizer['S2'][0], numpy.sum(feature_container.feat[0] ** 2, axis=0)) test_accumulate_finalize()
def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierGMM( method='gmm', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) # Test model count nose.tools.eq_(len(sc.model), 2) # Test model dimensions nose.tools.eq_(sc.model['scene1'].means_.shape[0], 6)
def test_empty(): nose.tools.eq_(FeatureContainer().shape, None) nose.tools.eq_(FeatureContainer().channels, None) nose.tools.eq_(FeatureContainer().frames, None) nose.tools.eq_(FeatureContainer().vector_length, None) nose.tools.eq_(FeatureContainer().feat, None) nose.tools.eq_(FeatureContainer().stat, None) nose.tools.eq_(FeatureContainer().meta, None)
def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierMLP( method='mlp', class_labels=['scene1', 'scene2'], params=learner_params, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) # Test epochs nose.tools.eq_(len(sc['learning_history']['loss']), learner_params['training']['epochs'])
def test_get_target_matrix_dict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem({ 'file': 'file1.wav', 'scene_label': 'scene1', }), 'file2.wav': MetaDataItem({ 'file': 'file2.wav', 'scene_label': 'scene2', }), } sc = SceneClassifier( class_labels=['scene1', 'scene2'], disable_progress_bar=True, ) target_matrix = sc._get_target_matrix_dict(data=data, annotations=annotations) # Test shape nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2)) nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2)) # Test content nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 0] == 1), True) nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 1] == 1), False) nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 0] == 1), False) nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 1] == 1), True)
def process_feature_data(self, feature_filename): feature_list = {} feature_list['mel'] = FeatureContainer().load( filename=feature_filename) feature_data = self.model_container.feature_stacker.process( feature_data=feature_list) # Normalize features if self.model_container.feature_normalizer: feature_data = self.model_container.feature_normalizer.normalize( feature_data) # Aggregate features if self.model_container.feature_aggregator: feature_data = self.model_container.feature_aggregator.process( feature_data) return feature_data
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([{ 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', }]), 'file2.wav': MetaDataContainer([{ 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', }]) } ed = EventDetectorMLP( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters': learner_params, }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': False, }, 'frame_binarization': { 'enable': True, 'type': 'global_threshold', 'threshold': 0.5, }, 'event_activity_processing': { 'enable': True, 'type': 'median_filtering', 'window_length_frames': 11, } } # Frame probabilities frame_probabilities = ed.predict(feature_data=feature_container, ) # Event recognizer result = EventRecognizer( hop_length_seconds=0.02, params=recognizer_params, class_labels=['event1', 'event2'], ).process(frame_probabilities=frame_probabilities) # Test result nose.tools.eq_(len(result) > 0, True)
def test_normalizer(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) # Test 1 test_recipe = 'mfcc=0-5' test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe) feature_container = FeatureContainer().load( filename=os.path.join('material', 'test.mfcc.cpickle')) feature_normalizer = FeatureNormalizer().accumulate( feature_container=feature_container).finalize() feature_stacker = FeatureStacker(recipe=test_recipe_parsed) feature_normalizer = feature_stacker.normalizer( normalizer_list={'mfcc': feature_normalizer}) nose.tools.eq_(feature_normalizer['N'][0][0], 501) nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 6) nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['std'][0].shape[1], 6) # Test 2 test_recipe = 'mfcc=1,2,3,4' test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe) feature_container = FeatureContainer().load( filename=os.path.join('material', 'test.mfcc.cpickle')) feature_normalizer = FeatureNormalizer().accumulate( feature_container=feature_container).finalize() feature_stacker = FeatureStacker(recipe=test_recipe_parsed) feature_normalizer = feature_stacker.normalizer( normalizer_list={'mfcc': feature_normalizer}) nose.tools.eq_(feature_normalizer['N'][0][0], 501) nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 4) nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['std'][0].shape[1], 4) # Test 3 test_recipe = 'mfcc' test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe) feature_container = FeatureContainer().load( filename=os.path.join('material', 'test.mfcc.cpickle')) feature_normalizer = FeatureNormalizer().accumulate( feature_container=feature_container).finalize() feature_stacker = FeatureStacker(recipe=test_recipe_parsed) feature_normalizer = feature_stacker.normalizer( normalizer_list={'mfcc': feature_normalizer}) nose.tools.eq_(feature_normalizer['N'][0][0], 501) nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 10) nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1) nose.tools.eq_(feature_normalizer['std'][0].shape[1], 10)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierMLP( method='mlp', class_labels=['scene1', 'scene2'], params=learner_params, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': False, }, 'frame_binarization': { 'enable': True, 'type': 'frame_max', }, 'decision_making': { 'enable': True, 'type': 'majority_vote', } } result = sc.predict( feature_data=feature_container, recognizer_params=recognizer_params ) # Test result nose.tools.eq_(len(result) > 0, True) # Test errors recognizer_params['frame_binarization']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params) recognizer_params['frame_binarization']['type'] = 'frame_max' recognizer_params['decision_making']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([ { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', } ] ), 'file2.wav': MetaDataContainer([ { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', } ] ) } ed = EventDetectorGMM( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters':{ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, } }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': False, 'type': 'sliding_sum', 'window_length_frames': 2, }, 'frame_binarization': { 'enable': True, 'type': 'global_threshold', 'threshold': 10, } } result = ed.predict( feature_data=feature_container, recognizer_params=recognizer_params ) # Test result nose.tools.eq_(len(result) > 5, True) # Test errors recognizer_params['frame_binarization']['type'] = 'test' nose.tools.assert_raises(AssertionError, ed.predict, feature_container, recognizer_params)
def test_load_wrong_type(): FeatureContainer().load(filename=os.path.join('material', 'wrong.yaml'))
def test_load_not_found(): FeatureContainer().load(filename=os.path.join('material', 'wrong.cpickle'))
def test_load(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) # Test #1 feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) # Meta nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat']) nose.tools.eq_(feature_container.channels, 1) nose.tools.eq_(feature_container.frames, 501) nose.tools.eq_(feature_container.vector_length, 10) nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav') nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40) nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10) # Stat nose.tools.eq_(feature_container.stat[0]['N'], 501) nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std']) # Feat # Shape nose.tools.eq_(feature_container.feat[0].shape[0], 501) nose.tools.eq_(feature_container.feat[0].shape[1], 10) nose.tools.eq_(feature_container.shape[0], 501) nose.tools.eq_(feature_container.shape[1], 10) # Test #2 feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle')) # Meta nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat']) nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav') nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40) nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10) # Stat nose.tools.eq_(feature_container.stat[0]['N'], 501) nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std']) # Feat # Shape nose.tools.eq_(feature_container.feat[0].shape[0], 501) nose.tools.eq_(feature_container.feat[0].shape[1], 10) nose.tools.eq_(feature_container.shape[0], 501) nose.tools.eq_(feature_container.shape[1], 10) # Test #3 feature_repository = FeatureContainer().load(filename_dict={'mfcc1': os.path.join('material', 'test.mfcc.cpickle'), 'mfcc2': os.path.join('material', 'test.mfcc.cpickle')}) nose.tools.assert_list_equal(sorted(list(feature_repository.keys())), ['mfcc1', 'mfcc2'])
def test_get_target_matrix_dict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([{ 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', }]), 'file2.wav': MetaDataContainer([{ 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', }]), } ed = EventDetector(class_labels=['event1', 'event2'], disable_progress_bar=True, params={ 'hop_length_seconds': 0.02, }) target_matrix = ed._get_target_matrix_dict(data=data, annotations=annotations) # Test shape nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2)) nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2)) # Test content nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 0] == 1), 50) nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 1] == 1), 50) nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 0] == 1), 50) nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 1] == 1), 50)
def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([{ 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', }]), 'file2.wav': MetaDataContainer([{ 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', }]) } ed = EventDetectorMLP( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters': learner_params }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) # Test epochs nose.tools.eq_(len(ed['learning_history']['loss']), learner_params['training']['epochs'])
def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([ { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', } ] ), 'file2.wav': MetaDataContainer([ { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', } ] ) } ed = EventDetectorGMM( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters':{ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, } }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) # Test model count nose.tools.eq_(len(ed.model), 2) # Test model dimensions nose.tools.eq_(ed.model['event1']['positive'].means_.shape[0], 6)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierGMM( method='gmm', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': True, 'type': 'sum', }, 'frame_binarization': { 'enable': False, }, 'decision_making': { 'enable': True, 'type': 'maximum', } } result = sc.predict( feature_data=feature_container, recognizer_params=recognizer_params ) # Test result nose.tools.eq_(result, 'scene1') # Test errors recognizer_params['frame_accumulation']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params) recognizer_params['frame_accumulation']['type'] = 'sum' recognizer_params['decision_making']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) # Test #1 feature_aggregator = FeatureAggregator( recipe=['mean'], win_length_frames=10, hop_length_frames=1, ) feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}]) feature_repository = FeatureContainer().load( filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_matrix = feature_stacker.process(feature_data=feature_repository) feature_matrix = feature_aggregator.process(feature_data=feature_matrix) nose.tools.eq_(feature_matrix.shape[0], 501) nose.tools.eq_(feature_matrix.shape[1], 10) # Test #2 feature_aggregator = FeatureAggregator( recipe=['mean', 'std'], win_length_frames=10, hop_length_frames=1, ) feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}]) feature_repository = FeatureContainer().load( filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_matrix = feature_stacker.process(feature_data=feature_repository) feature_matrix = feature_aggregator.process(feature_data=feature_matrix) nose.tools.eq_(feature_matrix.shape[0], 501) nose.tools.eq_(feature_matrix.shape[1], 2 * 10) # Test #3 feature_aggregator = FeatureAggregator( recipe=['mean', 'std', 'kurtosis', 'skew'], win_length_frames=10, hop_length_frames=1, ) feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}]) feature_repository = FeatureContainer().load( filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_matrix = feature_stacker.process(feature_data=feature_repository) feature_matrix = feature_aggregator.process(feature_data=feature_matrix) nose.tools.eq_(feature_matrix.shape[0], 501) nose.tools.eq_(feature_matrix.shape[1], 4 * 10) # Test #4 feature_aggregator = FeatureAggregator( recipe=['cov'], win_length_frames=10, hop_length_frames=1, ) feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}]) feature_repository = FeatureContainer().load( filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_matrix = feature_stacker.process(feature_data=feature_repository) feature_matrix = feature_aggregator.process(feature_data=feature_matrix) nose.tools.eq_(feature_matrix.shape[0], 501) nose.tools.eq_(feature_matrix.shape[1], 10 * 10) # Test #5 feature_aggregator = FeatureAggregator( recipe=['flatten'], win_length_frames=10, hop_length_frames=1, ) feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}]) feature_repository = FeatureContainer().load( filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_matrix = feature_stacker.process(feature_data=feature_repository) feature_matrix = feature_aggregator.process(feature_data=feature_matrix) nose.tools.eq_(feature_matrix.shape[0], 501) nose.tools.eq_(feature_matrix.shape[1], 10 * 10)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem({ 'file': 'file1.wav', 'scene_label': 'scene1', }), 'file2.wav': MetaDataItem({ 'file': 'file2.wav', 'scene_label': 'scene2', }), } sc = SceneClassifierGMMdeprecated( method='gmm_deprecated', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'random_state': 0, 'tol': 0.001, 'min_covar': 0.001, 'n_iter': 40, 'n_init': 1, 'params': 'wmc', 'init_params': 'wmc', }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': True, 'type': 'sum', }, 'frame_binarization': { 'enable': False, }, 'decision_making': { 'enable': True, 'type': 'maximum', } } # Frame probabilities frame_probabilities = sc.predict(feature_data=feature_container) # Scene recognizer result = SceneRecognizer( params=recognizer_params, class_labels=['scene1', 'scene2'], ).process(frame_probabilities=frame_probabilities) # Test result nose.tools.eq_(result, 'scene1')