示例#1
0
def test_save():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_container.save(
        filename=os.path.join('material', 'saved.mfcc.cpickle'))
def test_with_statement():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle'))
    with FeatureNormalizer() as feature_normalizer:
        feature_normalizer.accumulate(feature_container)

    nose.tools.eq_(feature_normalizer['N'][0], 501)

    numpy.testing.assert_array_equal(feature_normalizer['mean'][0][0],
                                     numpy.mean(feature_container.feat[0], axis=0))
    numpy.testing.assert_array_equal(feature_normalizer['S1'][0],
                                     numpy.sum(feature_container.feat[0], axis=0))
    numpy.testing.assert_array_equal(feature_normalizer['S2'][0],
                                     numpy.sum(feature_container.feat[0] ** 2, axis=0))

    test_accumulate_finalize()
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test model count
    nose.tools.eq_(len(sc.model), 2)

    # Test model dimensions
    nose.tools.eq_(sc.model['scene1'].means_.shape[0], 6)
def test_empty():
    nose.tools.eq_(FeatureContainer().shape, None)
    nose.tools.eq_(FeatureContainer().channels, None)
    nose.tools.eq_(FeatureContainer().frames, None)
    nose.tools.eq_(FeatureContainer().vector_length, None)
    nose.tools.eq_(FeatureContainer().feat, None)
    nose.tools.eq_(FeatureContainer().stat, None)
    nose.tools.eq_(FeatureContainer().meta, None)
示例#5
0
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test epochs
    nose.tools.eq_(len(sc['learning_history']['loss']), learner_params['training']['epochs'])
def test_get_target_matrix_dict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifier(
        class_labels=['scene1', 'scene2'],
        disable_progress_bar=True,
    )
    target_matrix = sc._get_target_matrix_dict(data=data,
                                               annotations=annotations)

    # Test shape
    nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2))
    nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2))

    # Test content
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 0] == 1), True)
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 1] == 1), False)

    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 0] == 1), False)
    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 1] == 1), True)
示例#7
0
    def process_feature_data(self, feature_filename):
        feature_list = {}
        feature_list['mel'] = FeatureContainer().load(
            filename=feature_filename)
        feature_data = self.model_container.feature_stacker.process(
            feature_data=feature_list)

        # Normalize features
        if self.model_container.feature_normalizer:
            feature_data = self.model_container.feature_normalizer.normalize(
                feature_data)

        # Aggregate features
        if self.model_container.feature_aggregator:
            feature_data = self.model_container.feature_aggregator.process(
                feature_data)
        return feature_data
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 4.0,
            'event_offset': 5.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'a',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 3.0,
            'event_offset': 4.0,
            'event_label': 'event2',
            'identifier': 'b',
        }])
    }

    ed = EventDetectorMLP(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters': learner_params,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'global_threshold',
            'threshold': 0.5,
        },
        'event_activity_processing': {
            'enable': True,
            'type': 'median_filtering',
            'window_length_frames': 11,
        }
    }
    # Frame probabilities
    frame_probabilities = ed.predict(feature_data=feature_container, )
    # Event recognizer
    result = EventRecognizer(
        hop_length_seconds=0.02,
        params=recognizer_params,
        class_labels=['event1', 'event2'],
    ).process(frame_probabilities=frame_probabilities)

    # Test result
    nose.tools.eq_(len(result) > 0, True)
def test_normalizer():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test 1
    test_recipe = 'mfcc=0-5'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 6)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 6)

    # Test 2
    test_recipe = 'mfcc=1,2,3,4'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 4)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 4)

    # Test 3
    test_recipe = 'mfcc'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 10)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 10)
示例#10
0
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'frame_max',
        },
        'decision_making': {
            'enable': True,
            'type': 'majority_vote',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(len(result) > 0, True)

    # Test errors
    recognizer_params['frame_binarization']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_binarization']['type'] = 'frame_max'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
示例#11
0
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataContainer([
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 4.0,
                    'event_offset': 5.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                }
            ]
        ),
        'file2.wav': MetaDataContainer([
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 3.0,
                    'event_offset': 4.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                }
            ]
        )
    }

    ed = EventDetectorGMM(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters':{
                'n_components': 6,
                'covariance_type': 'diag',
                'tol': 0.001,
                'reg_covar': 0,
                'max_iter': 40,
                'n_init': 1,
                'init_params': 'kmeans',
                'random_state': 0,
            }
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
            'type': 'sliding_sum',
            'window_length_frames': 2,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'global_threshold',
            'threshold': 10,
        }
    }
    result = ed.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(len(result) > 5, True)

    # Test errors
    recognizer_params['frame_binarization']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, ed.predict, feature_container, recognizer_params)
def test_load_wrong_type():
    FeatureContainer().load(filename=os.path.join('material', 'wrong.yaml'))
def test_load_not_found():
    FeatureContainer().load(filename=os.path.join('material', 'wrong.cpickle'))
def test_load():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    # Test #1
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    # Meta
    nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat'])

    nose.tools.eq_(feature_container.channels, 1)
    nose.tools.eq_(feature_container.frames, 501)
    nose.tools.eq_(feature_container.vector_length, 10)

    nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav')
    nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40)
    nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10)

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 10)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 10)

    # Test #2
    feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle'))

    # Meta
    nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat'])

    nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav')
    nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40)
    nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10)

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 10)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 10)

    # Test #3
    feature_repository = FeatureContainer().load(filename_dict={'mfcc1': os.path.join('material', 'test.mfcc.cpickle'),
                                                                'mfcc2': os.path.join('material', 'test.mfcc.cpickle')})

    nose.tools.assert_list_equal(sorted(list(feature_repository.keys())), ['mfcc1', 'mfcc2'])
示例#15
0
def test_get_target_matrix_dict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
        }]),
    }

    ed = EventDetector(class_labels=['event1', 'event2'],
                       disable_progress_bar=True,
                       params={
                           'hop_length_seconds': 0.02,
                       })
    target_matrix = ed._get_target_matrix_dict(data=data,
                                               annotations=annotations)

    # Test shape
    nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2))
    nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2))

    # Test content
    nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 0] == 1), 50)
    nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 1] == 1), 50)

    nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 0] == 1), 50)
    nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 1] == 1), 50)
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 4.0,
            'event_offset': 5.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'a',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 3.0,
            'event_offset': 4.0,
            'event_label': 'event2',
            'identifier': 'b',
        }])
    }

    ed = EventDetectorMLP(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters': learner_params
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    # Test epochs
    nose.tools.eq_(len(ed['learning_history']['loss']),
                   learner_params['training']['epochs'])
示例#17
0
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataContainer([
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 4.0,
                    'event_offset': 5.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                }
            ]
        ),
        'file2.wav': MetaDataContainer([
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 3.0,
                    'event_offset': 4.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                }
            ]
        )
    }

    ed = EventDetectorGMM(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters':{
                'n_components': 6,
                'covariance_type': 'diag',
                'tol': 0.001,
                'reg_covar': 0,
                'max_iter': 40,
                'n_init': 1,
                'init_params': 'kmeans',
                'random_state': 0,
            }
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)
    # Test model count
    nose.tools.eq_(len(ed.model), 2)

    # Test model dimensions
    nose.tools.eq_(ed.model['event1']['positive'].means_.shape[0], 6)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(result, 'scene1')

    # Test errors
    recognizer_params['frame_accumulation']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_accumulation']['type'] = 'sum'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    feature_aggregator = FeatureAggregator(
        recipe=['mean'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10)

    # Test #2
    feature_aggregator = FeatureAggregator(
        recipe=['mean', 'std'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 2 * 10)

    # Test #3
    feature_aggregator = FeatureAggregator(
        recipe=['mean', 'std', 'kurtosis', 'skew'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 4 * 10)

    # Test #4
    feature_aggregator = FeatureAggregator(
        recipe=['cov'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10 * 10)

    # Test #5
    feature_aggregator = FeatureAggregator(
        recipe=['flatten'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10 * 10)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifierGMMdeprecated(
        method='gmm_deprecated',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'random_state': 0,
            'tol': 0.001,
            'min_covar': 0.001,
            'n_iter': 40,
            'n_init': 1,
            'params': 'wmc',
            'init_params': 'wmc',
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    # Frame probabilities
    frame_probabilities = sc.predict(feature_data=feature_container)

    # Scene recognizer
    result = SceneRecognizer(
        params=recognizer_params,
        class_labels=['scene1', 'scene2'],
    ).process(frame_probabilities=frame_probabilities)

    # Test result
    nose.tools.eq_(result, 'scene1')