def test_relabel_flattens_full_overlap_into_combined_label(self): projections = { ('a', ): 'a', ('b', ): 'b', ('a', 'b'): 'a_b', } label_list = assets.LabelList( labels=[assets.Label('a', 3.2, 4.9), assets.Label('b', 3.9, 4.5)]) actual = relabeling.relabel(label_list, projections) assert len(actual) == 3 assert actual[0].start == 3.2 assert actual[0].end == 3.9 assert actual[0].value == 'a' assert actual[1].start == 3.9 assert actual[1].end == 4.5 assert actual[1].value == 'a_b' assert actual[2].start == 4.5 assert actual[2].end == 4.9 assert actual[2].value == 'a'
def setUp(self): self.utt1 = assets.Utterance('utt-1', 'file-1') self.utt1.set_label_list( assets.LabelList(idx='alpha', labels=[ assets.Label('music', 0, 5), assets.Label('speech', 5, 12), assets.Label('music', 13, 15) ])) self.utt1.set_label_list( assets.LabelList(idx='bravo', labels=[ assets.Label('music', 0, 1), assets.Label('speech', 2, 6) ])) self.utt2 = assets.Utterance('utt-2', 'file-2') self.utt2.set_label_list( assets.LabelList(idx='alpha', labels=[ assets.Label('music', 0, 5), assets.Label('speech', 5, 12), assets.Label('noise', 13, 15) ])) self.utt2.set_label_list( assets.LabelList(idx='bravo', labels=[ assets.Label('music', 0, 1), assets.Label('speech', 2, 6) ]))
def test_relabel_proceeds_despite_unmapped_labels_in_presence_of_wildcard_rule( self): label_list = assets.LabelList(labels=[ assets.Label('a', 3.2, 5.1), assets.Label('b', 4.2, 4.7), assets.Label('c', 4.3, 4.8) ]) actual = relabeling.relabel(label_list, { ('a', ): 'new_label_a', ('**', ): 'catch_all' }) assert len(actual) == 5 assert actual[0].start == 3.2 assert actual[0].end == 4.2 assert actual[0].value == 'new_label_a' assert actual[1].start == 4.2 assert actual[1].end == 4.3 assert actual[1].value == 'catch_all' assert actual[2].start == 4.3 assert actual[2].end == 4.7 assert actual[2].value == 'catch_all' assert actual[3].start == 4.7 assert actual[3].end == 4.8 assert actual[3].value == 'catch_all' assert actual[4].start == 4.8 assert actual[4].end == 5.1 assert actual[4].value == 'new_label_a'
def test_read_samples(self): file = assets.File('wav', resources.sample_wav_file('wav_1.wav')) issuer = assets.Issuer('toni') utt = assets.Utterance('test', file, issuer=issuer, start=1.0, end=2.30) l1 = assets.Label('a', 0.15, 0.448) l2 = assets.Label('a', 0.5, 0.73) ll = assets.LabelList(labels=[l1, l2]) utt.set_label_list(ll) expected, __ = librosa.core.load(file.path, sr=None, offset=1.15, duration=0.298) assert np.array_equal(l1.read_samples(), expected) expected, __ = librosa.core.load(file.path, sr=None, offset=1.5, duration=0.23) assert np.array_equal(l2.read_samples(), expected)
def test_all_projections_missing_if_no_projections_defined(self): label_list = assets.LabelList(labels=[ assets.Label('b', 3.2, 4.5), assets.Label('a', 4.0, 4.9), assets.Label('c', 4.2, 5.1) ]) unmapped_combinations = relabeling.find_missing_projections( label_list, {}) assert len(unmapped_combinations) == 5 assert ('b', ) in unmapped_combinations assert ( 'a', 'b', ) in unmapped_combinations assert ( 'a', 'b', 'c', ) in unmapped_combinations assert ( 'a', 'c', ) in unmapped_combinations assert ('c', ) in unmapped_combinations
def test_ranges_zero_to_end(self): ll = assets.LabelList( labels=[assets.Label('a', 0, -1), assets.Label('b', 5.1, 8.9)]) ranges = ll.ranges() r = next(ranges) self.assertEqual(0, r[0]) self.assertEqual(5.1, r[1]) self.assertIn(ll[0], r[2]) r = next(ranges) self.assertEqual(5.1, r[0]) self.assertEqual(8.9, r[1]) self.assertIn(ll[0], r[2]) self.assertIn(ll[1], r[2]) r = next(ranges) self.assertEqual(8.9, r[0]) self.assertEqual(-1, r[1]) self.assertIn(ll[0], r[2]) with self.assertRaises(StopIteration): next(ranges)
def test_relabel_removes_overlapping_segment(self): projections = { ('a', ): 'a', ( 'a', 'b', ): '', ('b', ): 'b', } label_list = assets.LabelList( labels=[assets.Label('a', 3.2, 5.1), assets.Label('b', 4.2, 4.7)]) actual = relabeling.relabel(label_list, projections) assert len(actual) == 2 assert actual[0].start == 3.2 assert actual[0].end == 4.2 assert actual[0].value == 'a' assert actual[1].start == 4.7 assert actual[1].end == 5.1 assert actual[1].value == 'a'
def test_no_missing_projections_if_projection_complete(self): projections = { ('b', ): 'foo', ( 'a', 'b', ): 'foo', ( 'a', 'b', 'c', ): 'foo', ( 'a', 'c', ): 'foo', ('c', ): 'bar' } label_list = assets.LabelList(labels=[ assets.Label('b', 3.2, 4.5), assets.Label('a', 4.0, 4.9), assets.Label('c', 4.2, 5.1) ]) unmapped_combinations = relabeling.find_missing_projections( label_list, projections) assert len(unmapped_combinations) == 0
def test_eq_ignores_label_list_relation(self): a = assets.Label('some label A', 1.0, 2.0) b = assets.Label('some label a', 1.0, 2.0) al = assets.LabelList(idx='one', labels=[a]) bl = assets.LabelList(idx='another', labels=[b]) assert a.label_list == al assert b.label_list == bl assert a == b
def test_extend(self): ll = assets.LabelList() label_a = assets.Label('some text') label_b = assets.Label('more text') label_c = assets.Label('text again') ll.extend([label_a, label_b, label_c]) assert len(ll) == 3 assert label_a.label_list == ll assert label_b.label_list == ll assert label_c.label_list == ll
def test_label_total_durations(self): ll = assets.LabelList(labels=[ assets.Label('a', 3.2, 4.5), assets.Label('b', 5.1, 8.9), assets.Label('c', 7.2, 10.5), assets.Label('a', 10.5, 14), assets.Label('c', 13, 14) ]) res = ll.label_total_duration() assert res['a'] == pytest.approx(4.8) assert res['b'] == pytest.approx(3.8) assert res['c'] == pytest.approx(4.3)
def test_label_count(self): ll = assets.LabelList(labels=[ assets.Label('a', 3.2, 4.5), assets.Label('b', 5.1, 8.9), assets.Label('c', 7.2, 10.5), assets.Label('a', 10.5, 14), assets.Label('c', 13, 14) ]) res = ll.label_count() self.assertEqual(2, res['a']) self.assertEqual(1, res['b']) self.assertEqual(2, res['c'])
def test_ranges_include_labels(self): ll = assets.LabelList( labels=[assets.Label('a', 3.2, 4.5), assets.Label('b', 5.1, 8.9)]) ranges = ll.ranges(include_labels=['a']) r = next(ranges) self.assertEqual(3.2, r[0]) self.assertEqual(4.5, r[1]) self.assertIn(ll[0], r[2]) with self.assertRaises(StopIteration): next(ranges)
def corpus_with_more_labels(): """ Corpus with an extra label-list. """ corpus = resources.create_single_label_corpus() corpus.utterances['utt-1'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('alpha')])) corpus.utterances['utt-2'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('alpha')])) corpus.utterances['utt-3'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-4'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-5'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-6'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-7'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-8'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) return corpus
def test_merge_consecutive_labels_with_same_values(): ll = assets.LabelList(labels=[ assets.Label('a', 0, 0.993), assets.Label('a', 1.001, 2.8), assets.Label('b', 2.8, 3.94) ]) label_cleaning.merge_consecutive_labels_with_same_values(ll, threshold=0.01) assert len(ll) == 2 assert ll[0] == assets.Label('a', 0, 2.8) assert ll[1] == assets.Label('b', 2.8, 3.94)
def test_relabel_flattens_partial_overlap_into_combined_label(self): projections = { ('a', ): 'a', ('b', ): 'b', ('c', ): 'c', ( 'a', 'b', ): 'a_b', ( 'a', 'b', 'c', ): 'a_b_c', ( 'b', 'c', ): 'b_c', } label_list = assets.LabelList(labels=[ assets.Label('a', 3.2, 4.5), assets.Label('b', 4.0, 4.9), assets.Label('c', 4.2, 5.1) ]) actual = relabeling.relabel(label_list, projections) assert len(actual) == 5 assert actual[0].start == 3.2 assert actual[0].end == 4.0 assert actual[0].value == 'a' assert actual[1].start == 4.0 assert actual[1].end == 4.2 assert actual[1].value == 'a_b' assert actual[2].start == 4.2 assert actual[2].end == 4.5 assert actual[2].value == 'a_b_c' assert actual[3].start == 4.5 assert actual[3].end == 4.9 assert actual[3].value == 'b_c' assert actual[4].start == 4.9 assert actual[4].end == 5.1 assert actual[4].value == 'c'
def test_relabel_throws_error_if_unmapped_labels_are_detected(self): label_list = assets.LabelList(labels=[ assets.Label('a', 3.2, 5.1), assets.Label('b', 4.2, 4.7), assets.Label('c', 4.3, 4.8) ]) unmapped_combinations = [('a', 'b'), ('a', 'b', 'c'), ('a', 'c')] expected_message = 'Unmapped combinations: {}'.format( unmapped_combinations) with pytest.raises(relabeling.UnmappedLabelsException) as ex: relabeling.relabel(label_list, {('a', ): 'foo'}) assert ex.value.message == expected_message
def test_label_creation(self): a = assets.Label('value', 6.2, 8.9) assert a.value == 'value' assert a.start == 6.2 assert a.end == 8.9 assert len(a.meta) == 0
def setUp(self): file = assets.File('wav', resources.sample_wav_file('wav_1.wav')) utt = assets.Utterance('utt', file, start=0.3, end=-1) ll = assets.LabelList() self.test_label = assets.Label('a', start=0.5, end=-1) ll.append(self.test_label) utt.set_label_list(ll)
def read_labels(path, corpus): for label_file in glob.glob( os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))): file_name = os.path.basename(label_file) key = file_name[len('{}_'.format(LABEL_FILE_PREFIX) ):len(file_name) - len('.txt')] utterance_labels = collections.defaultdict(list) labels = textfile.read_separated_lines_generator(label_file, separator=' ', max_columns=4) for record in labels: label = record[3] start = float(record[1]) end = float(record[2]) meta = None meta_match = META_PATTERN.match(label) if meta_match is not None: meta_json = meta_match.group(2) meta = json.loads(meta_json) label = meta_match.group(1) utterance_labels[record[0]].append( assets.Label(label, start, end, meta=meta)) for utterance_idx, labels in utterance_labels.items(): ll = assets.LabelList(idx=key, labels=labels) corpus.utterances[utterance_idx].set_label_list(ll)
def test_missing_projections_are_naturally_sorted(self): label_list = assets.LabelList(labels=[ assets.Label('b', 1.0, 2.0), assets.Label('a', 1.5, 2.5), ]) unmapped_combinations = relabeling.find_missing_projections( label_list, {}) assert len(unmapped_combinations) == 3 assert unmapped_combinations[0] == ('a', ) assert unmapped_combinations[1] == ( 'a', 'b', ) assert unmapped_combinations[2] == ('b', )
def test_no_missing_projections_if_covered_by_catch_all_rule(self): projections = { ('b', ): 'new_label_b', ('**', ): 'new_label_all', } label_list = assets.LabelList(labels=[ assets.Label('b', 3.2, 4.5), assets.Label('a', 4.0, 4.9), assets.Label('c', 4.2, 5.1) ]) unmapped_combinations = relabeling.find_missing_projections( label_list, projections) assert len(unmapped_combinations) == 0
def test_encode_utterance_takes_lower_index_first(self): file = assets.File('file-idx', resources.sample_wav_file('wav_1.wav')) utt = assets.Utterance('utt-idx', file, start=0, end=5) ll = assets.LabelList( labels=[assets.Label('music', 0, 3), assets.Label('speech', 3, 5)]) utt.set_label_list(ll) enc = label_encoding.FrameOrdinalEncoder( ['speech', 'music', 'noise'], frame_settings=units.FrameSettings(32000, 16000), sr=16000) actual = enc.encode(utt) expected = np.array([1, 1, 0, 0]).astype(np.int) assert np.array_equal(expected, actual)
def test_relabel_removes_unwanted_labels(self): projections = { ('a', ): '', ('b', ): 'b', } label_list = assets.LabelList( labels=[assets.Label('a', 3.2, 4.4), assets.Label('b', 4.4, 5.1)]) actual = relabeling.relabel(label_list, projections) assert len(actual) == 1 assert actual[0].start == 4.4 assert actual[0].end == 5.1 assert actual[0].value == 'b'
def test_label_creation_with_info(self): a = assets.Label('value', 6.2, 8.9, meta={'something': 2}) assert a.value == 'value' assert a.start == 6.2 assert a.end == 8.9 assert len(a.meta) == 1 assert a.meta['something'] == 2
def test_append(self): ll = assets.LabelList() label = assets.Label('some text') ll.append(label) assert len(ll) == 1 assert label.label_list == ll
def test_set_label_list(self): ll_4 = assets.LabelList( idx='delta', labels=[assets.Label('y', 0.0, 3.3), assets.Label('t', 3.8, 7.9)]) self.utt.set_label_list(ll_4) assert len(self.utt.label_lists) == 4 assert self.utt.label_lists['alpha'] == self.ll_1 assert self.utt.label_lists['bravo'] == self.ll_2 assert self.utt.label_lists['charlie'] == self.ll_3 assert self.utt.label_lists['delta'] == ll_4 assert self.ll_1.utterance == self.utt assert self.ll_2.utterance == self.utt assert self.ll_3.utterance == self.utt assert ll_4.utterance == self.utt
def test_relabel_maps_a_onto_b(self): label_list = assets.LabelList(labels=[assets.Label('a', 3.2, 4.5)]) actual = relabeling.relabel(label_list, {('a', ): 'b'}) assert len(actual) == 1 assert actual[0].start == 3.2 assert actual[0].end == 4.5 assert actual[0].value == 'b'
def test_no_duplicate_missing_projections_reported(self): label_list = assets.LabelList(labels=[ assets.Label('b', 1.0, 2.0), assets.Label('a', 1.5, 2.5), assets.Label('b', 3.0, 4.0), assets.Label('a', 3.5, 4.5), ]) unmapped_combinations = relabeling.find_missing_projections( label_list, {}) assert len(unmapped_combinations) == 3 assert ('b', ) in unmapped_combinations assert ( 'a', 'b', ) in unmapped_combinations assert ('a', ) in unmapped_combinations
def test_ranges_with_empty(self): ll = assets.LabelList(labels=[ assets.Label('a', 3.2, 4.5), assets.Label('b', 5.1, 8.9), assets.Label('c', 7.2, 10.5), assets.Label('d', 10.5, 14) ]) ranges = ll.ranges(yield_ranges_without_labels=True) r = next(ranges) self.assertEqual(3.2, r[0]) self.assertEqual(4.5, r[1]) self.assertIn(ll[0], r[2]) r = next(ranges) self.assertEqual(4.5, r[0]) self.assertEqual(5.1, r[1]) self.assertEqual(0, len(r[2])) r = next(ranges) self.assertEqual(5.1, r[0]) self.assertEqual(7.2, r[1]) self.assertIn(ll[1], r[2]) r = next(ranges) self.assertEqual(7.2, r[0]) self.assertEqual(8.9, r[1]) self.assertIn(ll[1], r[2]) self.assertIn(ll[2], r[2]) r = next(ranges) self.assertEqual(8.9, r[0]) self.assertEqual(10.5, r[1]) self.assertIn(ll[2], r[2]) r = next(ranges) self.assertEqual(10.5, r[0]) self.assertEqual(14, r[1]) self.assertIn(ll[3], r[2]) with self.assertRaises(StopIteration): next(ranges)