def test_read_samples(self): path = resources.sample_wav_file('wav_1.wav') track = tracks.FileTrack('wav', path) issuer = issuers.Issuer('toni') utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30) l1 = annotations.Label('a', 0.15, 0.448) l2 = annotations.Label('a', 0.5, 0.73) ll = annotations.LabelList(labels=[l1, l2]) utt.set_label_list(ll) expected, __ = librosa.core.load(path, sr=None, offset=1.15, duration=0.298) assert np.array_equal(l1.read_samples(), expected) expected, __ = librosa.core.load(path, sr=None, offset=1.5, duration=1.73 - 1.5) print(expected.shape) print(l2.read_samples().shape) assert np.array_equal(l2.read_samples(), expected)
def test_validate_returns_part_of_overlapping_label(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=9.0), annotations.Label('b', start=9.0, end=13.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=-2.0, end=5.9), annotations.Label('b', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelOverflowValidator('default') result = val.validate(ds) assert not result.passed assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'} assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')] assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
def test_validate(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=1.44), annotations.Label('a', start=1.89, end=10.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=1.33, end=5.9), annotations.Label('a', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelCoverageValidator('default') result = val.validate(ds) assert not result.passed assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'} assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)] assert result.uncovered_segments['utt-6'] == [(0.0, 1.33), (14.7, 15.0)]
def test_split(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0) ]) ll_2 = annotations.LabelList('words', labels=[ annotations.Label('b', start=0.0, end=30.0) ]) utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2]) res = utt.split([14.0, 29.5]) assert len(res) == 3 assert res[0].start == 0.0 assert res[0].end == 14.0 assert 'phones' in res[0].label_lists.keys() assert 'words' in res[0].label_lists.keys() assert res[1].start == 14.0 assert res[1].end == 29.5 assert 'phones' in res[1].label_lists.keys() assert 'words' in res[1].label_lists.keys() assert res[2].start == 29.5 assert res[2].end == 40.0 assert 'phones' in res[2].label_lists.keys() assert 'words' in res[2].label_lists.keys()
def test_encode_label_ends_at_utterance_end(self): track = tracks.FileTrack('file1', resources.sample_wav_file('med_len.wav')) utt = tracks.Utterance('utt1', track, start=3, end=14) ll = annotations.LabelList(labels=[ annotations.Label('speech', 0, 4), annotations.Label('music', 4, 9), annotations.Label('speech', 9, float('inf')), ]) utt.set_label_list(ll) enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'], 'default', frame_settings=units.FrameSettings( 32000, 16000), sr=16000) actual = enc.encode_utterance(utt) expected = np.array([ [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0], ]).astype(np.float32) assert np.array_equal(expected, actual)
def test_ordering_both_ref_none(self): pair_a = alignment.LabelPair( None, annotations.Label('a2', start=1.66, end=1.92)) pair_b = alignment.LabelPair( None, annotations.Label('b2', start=1.65, end=1.92)) assert pair_b < pair_a
def test_read_label_list_de(self): path = os.path.join(os.path.dirname(__file__), 'audacity_labels_de.txt') ll = audacity.read_label_list(path) assert ll == annotations.LabelList(labels=[ annotations.Label('music', 43352.824046, 43525.837661), annotations.Label('speech_male', 43512.446969, 43531.343483), ])
def test_equals(self): seg_a = alignment.Segment( 0.0, 0.9, ref=[annotations.Label('a'), annotations.Label('a')]) seg_b = alignment.Segment( 0.0, 0.9, ref=[annotations.Label('a'), annotations.Label('a')]) assert seg_a == seg_b
def test_compare_multi_labels_returns_smaller_start_time(self): seg_a = alignment.Segment( 0.0, 0.9, ref=[annotations.Label('a'), annotations.Label('a')]) seg_b = alignment.Segment( 0.2, 0.8, ref=[annotations.Label('a'), annotations.Label('a')]) assert seg_a < seg_b
def test_read_label_list_with_empty_value(self): path = os.path.join(os.path.dirname(__file__), 'audacity_labels_empty_value.txt') ll = audacity.read_label_list(path) assert ll == annotations.LabelList(labels=[ annotations.Label('music', 1, 4), annotations.Label('', 4, 7), annotations.Label('speech_male', 7, 9), ])
def test_ordering_only_end_differs(self): pair_a = alignment.LabelPair( annotations.Label('a1', start=1.55, end=1.88), annotations.Label('a2', start=1.66, end=1.92)) pair_b = alignment.LabelPair( annotations.Label('b1', start=1.55, end=1.87), annotations.Label('b2', start=1.66, end=1.92)) assert pair_b < pair_a
def test_align_insertion(self): ref_ll = [] hyp_ll = [annotations.Label('y', 7.6, 15.2)] result = alignment.FullMatchingAligner(0.1).align(ref_ll, hyp_ll) assert result == [ alignment.LabelPair(None, annotations.Label('y', 7.6, 15.2)) ]
def test_align_deletion(self): ref_ll = [annotations.Label('a', 4.2, 8.5)] hyp_ll = [] result = alignment.FullMatchingAligner(0.1).align(ref_ll, hyp_ll) assert result == [ alignment.LabelPair(annotations.Label('a', 4.2, 8.5), None) ]
def test_compare_multi_labels_with_same_times_returns_smaller_label_value( self): seg_a = alignment.Segment( 0.0, 1.0, ref=[annotations.Label('a'), annotations.Label('a')]) seg_b = alignment.Segment( 0.0, 1.0, ref=[annotations.Label('a'), annotations.Label('b')]) assert seg_a < seg_b
def test_eq_ignores_label_list_relation(self): a = annotations.Label('some label A', 1.0, 2.0) b = annotations.Label('some label a', 1.0, 2.0) al = annotations.LabelList(idx='one', labels=[a]) bl = annotations.LabelList(idx='another', labels=[b]) assert a.label_list == al assert b.label_list == bl assert a == b
def test_ordering(self): pair_a = alignment.LabelPair( annotations.Label('a1', start=1.55, end=1.88), annotations.Label('a2', start=1.66, end=1.92)) pair_b = alignment.LabelPair( annotations.Label('b1', start=1.59, end=1.88), annotations.Label('b2', start=1.66, end=1.92)) assert pair_a < pair_b
def test_label_set_for_value(self, sample_outcome): ls = sample_outcome.label_set_for_value('down') expected = [ annotations.Label('down', start=10.35, end=11.12), annotations.Label('down', start=39.28, end=40.0), annotations.Label('down', start=31.20, end=33.4), annotations.Label('down', start=39.28, end=40.0), annotations.Label('down', start=28.20, end=33.4) ] assert sorted(expected) == sorted(ls.labels)
def test_align_deletion(self): lev = alignment.LevenshteinAligner() ali = lev.align( ll_with_values(['a', 'b', 'c']), ll_with_values(['a', 'c']) ) assert ali == [ alignment.LabelPair(annotations.Label('a'), annotations.Label('a')), alignment.LabelPair(annotations.Label('b'), None), alignment.LabelPair(annotations.Label('c'), annotations.Label('c')), ]
def test_align_empty_ref_returns_all_none(self): lev = alignment.LevenshteinAligner() ali = lev.align( ll_with_values([]), ll_with_values(['a', 'b', 'c']) ) assert ali == [ alignment.LabelPair(None, annotations.Label('a')), alignment.LabelPair(None, annotations.Label('b')), alignment.LabelPair(None, annotations.Label('c')), ]
def corpus_with_more_labels(): """ Corpus with an extra label-list. """ corpus = resources.create_single_label_corpus() corpus.utterances['utt-1'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('alpha')])) corpus.utterances['utt-2'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('alpha')])) corpus.utterances['utt-3'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-4'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-5'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-6'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-7'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-8'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) return corpus
def test_align_empty_ref_returns_insertions(self): ll_ref = [] ll_hyp = [annotations.Label('greasy', 1.4, 1.9)] aligner = alignment.BipartiteMatchingAligner( substitution_penalty=2, non_overlap_penalty_weight=1 ) matches = aligner.align(ll_ref, ll_hyp) assert matches == [ alignment.LabelPair(None, annotations.Label('greasy', 1.4, 1.9)) ]
def ll_with_values(values): ll = [] for value in values: ll.append(annotations.Label(value)) return ll
def test_false_rejection_rate_with_no_occurences_returns_zero(self): result = evaluator.KWSEvaluator().evaluate( annotations.LabelList(labels=[]), annotations.LabelList( labels=[annotations.Label('four', 2.5, 3.0)])) assert result.false_rejection_rate() == 0.0
def test_label_creation(self): a = annotations.Label('value', 6.2, 8.9) assert a.value == 'value' assert a.start == 6.2 assert a.end == 8.9 assert len(a.meta) == 0
def read_labels(path, corpus): for label_file in glob.glob( os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))): file_name = os.path.basename(label_file) key = file_name[len('{}_'.format(LABEL_FILE_PREFIX) ):len(file_name) - len('.txt')] utterance_labels = collections.defaultdict(list) labels = textfile.read_separated_lines_generator(label_file, separator=' ', max_columns=4) for record in labels: label = record[3] start = float(record[1]) end = float(record[2]) meta = None meta_match = META_PATTERN.match(label) if end == -1: end = float('inf') if meta_match is not None: meta_json = meta_match.group(2) meta = json.loads(meta_json) label = meta_match.group(1) utterance_labels[record[0]].append( annotations.Label(label, start, end, meta=meta)) for utterance_idx, labels in utterance_labels.items(): ll = annotations.LabelList(idx=key, labels=labels) corpus.utterances[utterance_idx].set_label_list(ll)
def generate_labels(n): items = [] for i in range(n): label = annotations.Label('label-{}'.format(i)) items.append(label) return items
def test_align_empty_hypothesis(self): ref = annotations.LabelList(labels=[ annotations.Label('b', 4, 8) ]) hyp = annotations.LabelList(labels=[ ]) result = alignment.InvariantSegmentAligner().align(ref, hyp) assert len(result) == 1 segment = result[0] assert segment.start == 4 assert segment.end == 8 assert segment.ref == [annotations.Label('b', 4, 8)] assert segment.hyp == []
def test_label_creation_with_info(self): a = annotations.Label('value', 6.2, 8.9, meta={'something': 2}) assert a.value == 'value' assert a.start == 6.2 assert a.end == 8.9 assert len(a.meta) == 1 assert a.meta['something'] == 2
def test_merge_corpus_label_lists(self): main_corpus = resources.create_dataset() merging_corpus = resources.create_multi_label_corpus() main_corpus.merge_corpus(merging_corpus) assert set(main_corpus.utterances['utt-2_1'].label_lists.keys()) == { 'default' } ll = main_corpus.utterances['utt-2_1'].label_lists['default'] assert ll == annotations.LabelList(labels=[ annotations.Label('music', 0, 5), annotations.Label('speech', 5, 12), annotations.Label('music', 13, 15) ])
def test_set_label_list(self): ll_4 = annotations.LabelList(idx='delta', labels=[ annotations.Label('y', 0.0, 3.3), annotations.Label('t', 3.8, 7.9) ]) self.utt.set_label_list(ll_4) assert len(self.utt.label_lists) == 4 assert self.utt.label_lists['alpha'] == self.ll_1 assert self.utt.label_lists['bravo'] == self.ll_2 assert self.utt.label_lists['charlie'] == self.ll_3 assert self.utt.label_lists['delta'] == ll_4 assert self.ll_1.utterance == self.utt assert self.ll_2.utterance == self.utt assert self.ll_3.utterance == self.utt assert ll_4.utterance == self.utt