Exemplo n.º 1
0
    def test_split_by_label_length_issuer_separated_only_uses_given_label_list(self, split_mock):
        corpus = resources.create_multi_label_corpus()
        splitter = splitting.Splitter(corpus, random_seed=INITIAL_SEED)

        for utt in corpus.utterances.values():
            utt.set_label_list(annotations.LabelList.create_single(
                'another label', idx='some-idx'
            ))

        split_mock.return_value = {
            'train': ['spk-1', 'spk-2'],
            'test': ['spk-3']
        }
        res = splitter.split_by_label_length(
            {'train': 0.5, 'test': 0.5},
            label_list_idx='default',
            separate_issuers=True
        )

        assert res['train'].utterances.keys() == {
            'utt-1', 'utt-2', 'utt-3', 'utt-4', 'utt-5'
        }
        assert res['test'].utterances.keys() == {
            'utt-6', 'utt-7', 'utt-8'
        }

        split_mock.assert_called_with(
            {
                'spk-1': {'length': 32},
                'spk-2': {'length': 33},
                'spk-3': {'length': 32},
            },
            {'train': 0.5, 'test': 0.5},
            seed=mock.ANY
        )
Exemplo n.º 2
0
    def test_encode_full_utterance(self):
        ds = resources.create_multi_label_corpus()
        enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'],
                                       'default',
                                       frame_settings=units.FrameSettings(
                                           32000, 16000),
                                       sr=16000)

        actual = enc.encode_utterance(ds.utterances['utt-6'])
        expected = np.array([
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [1, 0, 0],
            [1, 0, 0],
        ]).astype(np.float32)

        assert np.array_equal(expected, actual)
Exemplo n.º 3
0
    def test_split_by_number_of_utterances_seed(self):
        corpus = resources.create_multi_label_corpus()
        res1 = splitting.Splitter(
            corpus, random_seed=15).split_by_number_of_utterances({
                'train': 0.6,
                'test': 0.2
            })

        corpus = resources.create_multi_label_corpus()
        res2 = splitting.Splitter(
            corpus, random_seed=15).split_by_number_of_utterances({
                'train': 0.6,
                'test': 0.2
            })

        assert set(res1['train'].utterances.keys()) == set(
            res2['train'].utterances.keys())
        assert set(res1['test'].utterances.keys()) == set(
            res2['test'].utterances.keys())
Exemplo n.º 4
0
    def test_split_by_proportionally_distribute_labels_by_number_seed(self):
        corpus = resources.create_multi_label_corpus()
        splitter = splitting.Splitter(corpus, random_seed=15)
        res1 = splitter.split_by_proportionally_distribute_labels(
            {
                'train': 0.6,
                'test': 0.2
            }, use_lengths=False)

        corpus = resources.create_multi_label_corpus()
        splitter = splitting.Splitter(corpus, random_seed=15)
        res2 = splitter.split_by_proportionally_distribute_labels(
            {
                'train': 0.6,
                'test': 0.2
            }, use_lengths=False)

        assert set(res1['train'].utterances.keys()) == set(
            res2['train'].utterances.keys())
        assert set(res1['test'].utterances.keys()) == set(
            res2['test'].utterances.keys())
Exemplo n.º 5
0
    def test_encode_utterance(self):
        ds = resources.create_multi_label_corpus()
        enc = label_encoding.FrameOrdinalEncoder(
            ['music', 'speech', 'noise'],
            frame_settings=units.FrameSettings(32000, 16000),
            sr=16000)

        actual = enc.encode(ds.utterances['utt-6'])
        expected = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
                             0]).astype(np.int)

        assert np.array_equal(expected, actual)
Exemplo n.º 6
0
    def test_split_by_label_length_only_uses_given_label_list(self, split_mock):
        corpus = resources.create_multi_label_corpus()
        splitter = splitting.Splitter(corpus, random_seed=INITIAL_SEED)

        for utt in corpus.utterances.values():
            utt.set_label_list(annotations.LabelList.create_single(
                'another label', idx='some-idx'
            ))

        split_mock.return_value = {
            'train': ['utt-1', 'utt-3'],
            'test': ['utt-3', 'utt-4'],
            'dev': ['utt-5', 'utt-6'],
        }
        res = splitter.split_by_label_length(
            {'train': 0.6, 'test': 0.2, 'dev': 0.2},
            label_list_idx='default'
        )

        assert res['train'].utterances.keys() == {'utt-1', 'utt-3'}
        assert res['test'].utterances.keys() == {'utt-3', 'utt-4'}
        assert res['dev'].utterances.keys() == {'utt-5', 'utt-6'}

        split_mock.assert_called_with(
            {
                'utt-1': {'length': 16},
                'utt-2': {'length': 16},
                'utt-3': {'length': 11},
                'utt-4': {'length': 16},
                'utt-5': {'length': 6},
                'utt-6': {'length': 16},
                'utt-7': {'length': 11},
                'utt-8': {'length': 5},
            },
            {'train': 0.6, 'test': 0.2, 'dev': 0.2},
            seed=mock.ANY
        )
Exemplo n.º 7
0
def corpus():
    return resources.create_multi_label_corpus()
Exemplo n.º 8
0
def splitter():
    corpus = resources.create_multi_label_corpus()
    return splitting.Splitter(corpus)
Exemplo n.º 9
0
 def setUp(self):
     self.corpus = resources.create_multi_label_corpus()
     self.splitter = splitting.Splitter(self.corpus)
Exemplo n.º 10
0
 def setUp(self):
     self.ds = resources.create_multi_label_corpus()
Exemplo n.º 11
0
def splitter():
    corpus = resources.create_multi_label_corpus()
    return splitting.Splitter(corpus, random_seed=INITIAL_SEED)