def test_ranges_zero_to_end(self): labels = [ Label('a', 0, float('inf')), Label('b', 5.1, 8.9) ] ll = LabelList(labels=labels) ranges = ll.ranges() r = next(ranges) assert 0 == r[0] assert 5.1 == r[1] assert labels[0] in r[2] r = next(ranges) assert 5.1 == r[0] assert 8.9 == r[1] assert labels[0] in r[2] assert labels[1] in r[2] r = next(ranges) assert 8.9 == r[0] assert float('inf') == r[1] assert labels[0] in r[2] with pytest.raises(StopIteration): next(ranges)
def test_split_without_cutting_points_raises_error(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 9.0), Label('c', 9.0, 12.0)]) with pytest.raises(ValueError): ll.split([])
def test_no_missing_projections_if_projection_complete(self): projections = { ('b', ): 'foo', ( 'a', 'b', ): 'foo', ( 'a', 'b', 'c', ): 'foo', ( 'a', 'c', ): 'foo', ('c', ): 'bar' } label_list = LabelList(labels=[ Label('b', 3.2, 4.5), Label('a', 4.0, 4.9), Label('c', 4.2, 5.1) ]) unmapped_combinations = relabeling.find_missing_projections( label_list, projections) assert len(unmapped_combinations) == 0
def test_all_projections_missing_if_no_projections_defined(self): label_list = LabelList(labels=[ Label('b', 3.2, 4.5), Label('a', 4.0, 4.9), Label('c', 4.2, 5.1) ]) unmapped_combinations = relabeling.find_missing_projections( label_list, {}) assert len(unmapped_combinations) == 5 assert ('b', ) in unmapped_combinations assert ( 'a', 'b', ) in unmapped_combinations assert ( 'a', 'b', 'c', ) in unmapped_combinations assert ( 'a', 'c', ) in unmapped_combinations assert ('c', ) in unmapped_combinations
def test_tokenized(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.tokenized() == ['a', 'u', 't', 'b', 'x', 'c']
def test_join_with_custom_delimiter(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.join(delimiter=' - ') == 'a - b - c'
def test_join(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.join() == 'a b c'
def test_len(self): ll = LabelList(labels=[ Label('some text'), Label('more text'), Label('text again'), ]) assert len(ll) == 3
def test_relabel_maps_a_onto_b(self): label_list = LabelList(labels=[Label('a', 3.2, 4.5)]) actual = relabeling.relabel(label_list, {('a', ): 'b'}) expected = LabelList(labels=[Label('b', 3.2, 4.5)]) assert actual == expected
def create_sample_dataset(temp_dir): ds = audiomate.Corpus(str(temp_dir)) file_1_path = resources.sample_wav_file('wav_1.wav') file_2_path = resources.sample_wav_file('wav_2.wav') file_3_path = resources.get_resource_path( ['audio_formats', 'flac_1_16k_16b.flac']) file_1 = ds.new_file(file_1_path, track_idx='wav_1') file_2 = ds.new_file(file_2_path, track_idx='wav_2') file_3 = ds.new_file(file_3_path, track_idx='wav_3') issuer_1 = Speaker('spk-1', gender=Gender.MALE) issuer_2 = Speaker('spk-2', gender=Gender.FEMALE) issuer_3 = Issuer('spk-3') ds.import_issuers([issuer_1, issuer_2, issuer_3]) # 2.5951875 utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_2.idx, start=0, end=1.5) utt_3 = ds.new_utterance('utt-3', file_2.idx, issuer_idx=issuer_2.idx, start=1.5, end=2.5) # 5.0416875 utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx) utt_1.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who am i')])) utt_2.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are you')])) utt_3.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is he')])) utt_4.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are they')])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-3'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) return ds
def test_tokenized_raises_error_if_overlap_is_higher_than_threshold_given_an_endless_label(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 4.5, float('inf')), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.tokenized()
def test_tokenized_raises_error_if_overlap_is_higher_than_threshold(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 3.85, 8.0), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.tokenized()
def test_join_raises_error_if_overlap_is_higher_than_threshold(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 3.8, 8.0), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.join(overlap_threshold=0.1)
def test_end(self): ll = LabelList(idx='test', labels=[ Label('c', 9.0, 12.0), Label('a', 0.2, 4.0), Label('b', 4.0, 8.0), Label('c', 12.05, 14.0), Label('b', 7.9, 9.3), ]) assert ll.end == 14.0
def test_split_with_cutting_point_after_last_label(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 4.0), Label('c', 4.0, 8.9)]) res = ll.split([10.5]) assert len(res) == 2 assert len(res[0]) == 2 assert len(res[1]) == 0
def test_label_values(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 14) ]) assert ll.label_values() == ['a', 'b', 'c']
def test_total_length(self): ll = LabelList(labels=[ Label('abc', 3.2, 4.5), Label('bg', 5.1, 8.9), Label('caaf', 7.2, 10.5), Label('yxva', 10.5, 14), Label('cy', 13, 14) ]) assert ll.total_length == 15
def test_end_returns_inf(self): ll = LabelList(idx='test', labels=[ Label('c', 9.0, 12.0), Label('a', 0.2, 4.0), Label('b', 4.0, float('inf')), Label('c', 12.05, 14.0), Label('b', 7.9, 9.3), ]) assert ll.end == float('inf')
def test_split_unsorted_label_list(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('c', 9.0, 12.0), Label('b', 4.0, 8.0) ]) res = ll.split([1.9, 6.2, 10.5]) assert res == [ LabelList(idx='test', labels=[ Label('a', 0.0, 1.9), ]), LabelList(idx='test', labels=[ Label('a', 1.9, 4.0), Label('b', 4.0, 6.2), ]), LabelList(idx='test', labels=[ Label('b', 6.2, 8.0), Label('c', 9.0, 10.5), ]), LabelList(idx='test', labels=[ Label('c', 10.5, 12.0), ]), ]
def test_split_with_overlap(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) res = ll.split([1.9, 6.2, 10.5], overlap=2.0) assert res == [ LabelList(idx='test', labels=[ Label('a', 0.0, 3.9) ]), LabelList(idx='test', labels=[ Label('a', 0, 4.0), Label('b', 4.0, 8.0) ]), LabelList(idx='test', labels=[ Label('b', 4.2, 8.0), Label('c', 9.0, 12.0)] ), LabelList(idx='test', labels=[ Label('c', 9.0, 12.0) ]), ]
def test_split_cutting_point_on_boundary_doesnot_split_label(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 9.0), Label('c', 9.0, 12.0)]) res = ll.split([9.0]) assert len(res) == 2 assert len(res[0]) == 1 assert len(res[1]) == 1
def test_with_label_values(self): ll = LabelList.with_label_values([ 'a', 'b', 'c', ]) assert ll == LabelList(labels=[ Label('a'), Label('b'), Label('c'), ])
def test_split_single_label_that_doesnt_start_at_zero(self): ll = LabelList(idx='test', labels=[Label('c', 8.0, 12.0)]) res = ll.split([11.2], shift_times=True) assert len(res) == 2 assert len(res[0]) == 1 assert sorted(res[0])[0] == Label('c', 8.0, 11.2) assert len(res[1]) == 1 assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
def test_is(self): ll_a = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('b', 7.9, 9.3), Label('c', 9.0, 12.0), Label('c', 12.05, 14.0), ]) ll_b = ll_a assert ll_a is ll_b
def test_iter(self): ll = LabelList(labels=[ Label('some text'), Label('more text'), Label('text again'), ]) labels = sorted(ll) assert labels[0].value == 'more text' assert labels[1].value == 'some text' assert labels[2].value == 'text again'
def test_with_label_values_sets_correct_idx(self): ll = LabelList.with_label_values([ 'a', 'b', 'c', ], idx='letters') assert ll == LabelList(idx='letters', labels=[ Label('a'), Label('b'), Label('c'), ])
def test_merge_overlaps_with_threshold(self): ll = LabelList(idx='test', labels=[ Label('c', 9.0, 12.0), Label('c', 12.05, 14.0), ]) ll.merge_overlaps(threshold=0.1) expected = LabelList(idx='test', labels=[ Label('c', 9.0, 14.0), ]) assert ll == expected
def test_ranges_include_labels(self): labels = [Label('a', 3.2, 4.5), Label('b', 5.1, 8.9)] ll = LabelList(labels=labels) ranges = ll.ranges(include_labels=['a']) r = next(ranges) assert 3.2 == r[0] assert 4.5 == r[1] assert labels[0] in r[2] with pytest.raises(StopIteration): next(ranges)
def test_all_tokens(self): ll = LabelList(labels=[ Label('some text'), Label('more text'), Label('text again'), ]) assert sorted(ll.all_tokens()) == [ 'again', 'more', 'some', 'text', ]
def test_split_label_within_cutting_points_is_included(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) res = ll.split([1.9, 10.5]) assert len(res[1]) == 3 assert sorted(res[1])[1].value == 'b' assert sorted(res[1])[1].start == 4.0 assert sorted(res[1])[1].end == 8.0