class TestSegmentAnnotationCmp(object): sa1 = SegmentAnnotation('n1', [FragmentToken('n1', Interval(0, 0.5), None)]) sa2 = SegmentAnnotation('n1', [FragmentToken('n1', Interval(0.5, 1.5), None)]) sa3 = SegmentAnnotation('n1', [FragmentToken('n1', Interval(1.3, 1.4), None)]) sa4 = SegmentAnnotation('n2', [FragmentToken('n2', Interval(0, 1), None)]) def test_invalid_comparison(self): with pytest.raises(ValueError): annotation_cmp(self.sa1, self.sa4) def test_annotation_eq(self): assert (annotation_cmp(self.sa1, self.sa1) == 0) assert (annotation_cmp(self.sa2, self.sa2) == 0) assert (annotation_cmp(self.sa3, self.sa3) == 0) def test_annotation_cmp(self): assert (annotation_cmp(self.sa1, self.sa2) == -1) assert (annotation_cmp(self.sa1, self.sa3) == -1) assert (annotation_cmp(self.sa2, self.sa1) == 1) assert (annotation_cmp(self.sa3, self.sa1) == 1) assert (annotation_cmp(self.sa2, self.sa3) == 0) assert (annotation_cmp(self.sa3, self.sa2) == 0)
def test_not_enough_overlap(self): i1 = Interval(0, 1) i2 = Interval(0.98, 2) assert (not i1.overlaps_with(i2)) assert (not i2.overlaps_with(i1)) assert (interval_cmp(i1, i2) == -1) assert (interval_cmp(i2, i1) == 1)
def test_badinterval(self): with pytest.raises(ValueError): Interval(1, 0) with pytest.raises(ValueError): Interval(-1, -0.5) with pytest.raises(ValueError): Interval(-2, -3)
def test_interval_errors(self): assert (check_intervals(self.d2, self.m1) == ([ FragmentToken('a', Interval(0.5, 1.5), 'm1') ], [])) assert (check_intervals(self.d4, self.m1) == ([ FragmentToken('a', Interval(0.5, 2.5), 'm1') ], []))
def test_annotation_at_interval(self): assert (self.sa.annotation_at_interval(Interval(0.0, 0.5)) == tuple( ['a', 'r', 'm', 's', 'a'])) assert (self.sa.annotation_at_interval(Interval(0.1, 0.4)) == tuple( ['r', 'm', 's'])) assert (self.sa.annotation_at_interval(Interval(0.0, 0.05)) == tuple(['a'])) assert (self.sa.annotation_at_interval(Interval(10, 11)) == tuple())
def test_eq_wrong_ntokens(self): sa1 = SegmentAnnotation('name1', [FragmentToken('', Interval(0, 2), None)]) sa2 = SegmentAnnotation('name1', [ FragmentToken('', Interval(0, 1), None), FragmentToken('', Interval(1, 2), None) ]) assert (sa1 != sa2)
def test_truncate_interval(self): assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], [])) assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], []))
def test_tokens_at_interval(self): assert (self.sa.tokens_at_interval(Interval(0.0, 0.5)) == tuple( self.tokenlist)) assert (self.sa.tokens_at_interval(Interval(0.1, 0.4)) == tuple( self.tokenlist[1:4])) assert (self.sa.tokens_at_interval(Interval( 0.0, 0.05)) == (self.tokenlist[0], )) assert (self.sa.tokens_at_interval(Interval(10, 11)) == tuple()) assert (SegmentAnnotation('', []).tokens_at_interval(Interval( 0, 1)) == tuple())
def load(phndir, wrddir, outdir, prefix): fragments = load_filesets(phndir, wrddir) phn_fragments, wrd_fragments = zip(*fragments) print(len(phn_fragments), len(wrd_fragments)) for i in range(len(phn_fragments)): if phn_fragments[i] == []: print(i) # remove "sil", "sp" #phn_fragments = [[f for f in fl if not f.mark in ['SIL', 'sp']] #'__#__', # for fl in phn_fragments] #wrd_fragments = [[f for f in fl if not f.mark in ['SIL', 'sp']] #'__SIL__' , # for fl in wrd_fragments] #If i remove SIL, the script stops working intervals_from_phn = { fl[0].name: Interval(fl[0].interval.start, fl[-1].interval.end) for fl in phn_fragments } intervals_from_wrd = { fl[0].name: Interval(fl[0].interval.start, fl[-1].interval.end) for fl in wrd_fragments } # check that the total file intervals match up #print len(intervals_from_phn), len(intervals_from_wrd) assert (len(intervals_from_phn) == len(intervals_from_wrd)) #print len(wrd_fragments) #print wrd_fragments[0] # check that each word corresponds to a sequence of phones exactly phn_corpus = tokenlists_to_corpus(phn_fragments) wrd_corpus = tokenlists_to_corpus(wrd_fragments) # (will raise exception if exact match is not found) (phn_corpus.tokens_exact(name, interval) for name, interval, mark in wrd_corpus.iter_fragments()) # write concatenated phn, wrd files with open(path.join(outdir, prefix + '.phn'), 'w') as fp: for fragment in sorted(chain.from_iterable(phn_fragments), key=lambda x: (x.name, x.interval.start)): fp.write('{0} {1:.2f} {2:.2f} {3}\n'.format( fragment.name, fragment.interval.start, fragment.interval.end, fragment.mark)) with open(path.join(outdir, prefix + '.wrd'), 'w') as fp: for fragment in sorted(chain.from_iterable(wrd_fragments), key=lambda x: (x.name, x.interval.start)): fp.write('{0} {1:.2f} {2:.2f} {3}\n'.format( fragment.name, fragment.interval.start, fragment.interval.end, fragment.mark)) with open(path.join(outdir, prefix + '.split'), 'w') as fp: for name, interval in sorted(intervals_from_phn.iteritems()): fp.write('{0} {1:.2f} {2:.2f}\n'.format(name, interval.start, interval.end)) return phn_fragments, wrd_fragments
def test_ca_intervals(self): exp_intervals = { 'a': [Interval(0.0, 0.5), Interval(0.7, 1.3)], 'b': [Interval(0.1, 0.6)] } pred_intervals = {} for fname in self.ca.keys(): intervals = [ fa.interval for fa in self.ca.segment_annotations[fname] ] pred_intervals[fname] = intervals assert (exp_intervals == pred_intervals)
def load(phndir, wrddir, outdir): fragments = load_filesets(phndir, wrddir) phn_fragments, wrd_fragments = zip(*fragments) # remove "sil", "sp", "SIL" phn_fragments = [[f for f in fl if not f.mark in ['sil', 'sp', 'SIL']] for fl in phn_fragments] wrd_fragments = [[f for f in fl if not f.mark in ['sil', 'sp', 'SIL']] for fl in wrd_fragments] intervals_from_phn = { fl[0].name: Interval(fl[0].interval.start, fl[-1].interval.end) for fl in phn_fragments } intervals_from_wrd = { fl[0].name: Interval(fl[0].interval.start, fl[-1].interval.end) for fl in wrd_fragments } # check that the total file intervals match up assert (intervals_from_phn == intervals_from_wrd) # check that each word corresponds to a sequence of phones exactly wrd_corpus = tokenlists_to_corpus(wrd_fragments) phn_corpus = tokenlists_to_corpus(phn_fragments) # (will raise exception if exact match is not found) (phn_corpus.tokens_exact(name, interval) for name, interval, mark in wrd_corpus.iter_fragments()) # write concatenated phn, wrd files with open(path.join(outdir, '{}.phn'.format(CORPUS)), 'w') as fp: for fragment in sorted(chain.from_iterable(phn_fragments), key=lambda x: (x.name, x.interval.start)): fp.write(u'{0} {1:.4f} {2:.4f} {3}\n'.format( fragment.name, fragment.interval.start, fragment.interval.end, fragment.mark)) with open(path.join(outdir, '{}.wrd'.format(CORPUS)), 'w') as fp: for fragment in sorted(chain.from_iterable(wrd_fragments), key=lambda x: (x.name, x.interval.start)): fp.write(u'{0} {1:.4f} {2:.4f} {3}\n'.format( fragment.name, fragment.interval.start, fragment.interval.end, fragment.mark)) with open(path.join(outdir, '{}.split'.format(CORPUS)), 'w') as fp: for name, interval in sorted(intervals_from_phn.iteritems()): fp.write(u'{0} {1:.4f} {2:.4f}\n'.format(name, interval.start, interval.end)) return phn_fragments, wrd_fragments
def test_typeset(): pairs = [(FragmentToken(None, Interval(0, 1), 'm{0}'.format(n1)), FragmentToken(None, Interval(0, 1), 'n{0}'.format(n2))) for n1, n2 in zip(xrange(10), xrange(10, 20))] assert (set(list( typeset(pairs))) == set(['m{0}'.format(n) for n in xrange(10)] + ['n{0}'.format(n) for n in xrange(10, 20)])) pairs = [] assert (set(list(typeset(pairs))) == set()) pairs = [(FragmentToken(None, Interval(0, 1), 'm{0}'.format(n)), FragmentToken(None, Interval(0, 1), 'm{0}'.format(n))) for n in xrange(10)] assert (set(typeset(pairs)) == set('m{0}'.format(n) for n in xrange(10)))
class TestCheckTruncateIntervals(object): m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]}) d1 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )}) d2 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )}) d3 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )}) d4 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )}) sa = [ SegmentAnnotation('a', [ FragmentToken('a', Interval(0.0, 0.25), 'a'), FragmentToken('a', Interval(0.25, 0.5), 'b'), FragmentToken('a', Interval(0.5, 0.75), 'c'), FragmentToken('a', Interval(0.75, 1.0), 'd') ]) ] ca = Corpus(sa) def test_good_interval(self): assert (truncate_intervals(self.d1, self.ca, self.m1) == (self.d1, [], [])) def test_truncate_interval(self): assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], [])) assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], []))
def test_freqs(): pairs = [(FragmentToken(None, Interval(0, 1), 'm{0}'.format(n1)), FragmentToken(None, Interval(0, 1), 'n{0}'.format(n2))) for n1, n2 in zip(xrange(10), xrange(10, 20))] assert (freqs(pairs) == dict({'m{0}'.format(n): 1 for n in xrange(10)}.items() + {'n{0}'.format(n): 1 for n in xrange(10, 20)}.items())) pairs = [] assert (freqs(pairs) == dict()) pairs = [(FragmentToken(None, Interval(0, 1), 'm{0}'.format(n)), FragmentToken(None, Interval(0, 1), 'm{0}'.format(n))) for n in xrange(10)] assert (freqs(pairs) == {'m{0}'.format(n): 1 for n in xrange(10)})
def pairwise_substring_completion(fragment1, fragment2, corpus, minlength, maxlength): name1, name2 = fragment1.name, fragment2.name tokenseq1 = [(f.mark, f.interval) for f in corpus.tokens(name1, fragment1.interval)] tokenseq2 = [(f.mark, f.interval) for f in corpus.tokens(name2, fragment2.interval)] for seq1, seq2 in psubstrings(tokenseq1, tokenseq2, minlength, maxlength): submark1, intervalseq1 = zip(*seq1) submark2, intervalseq2 = zip(*seq2) interval1 = Interval(intervalseq1[0].start, intervalseq1[-1].end) interval2 = Interval(intervalseq2[0].start, intervalseq2[-1].end) yield (FragmentToken(name1, interval1, submark1), FragmentToken(name2, interval2, submark2))
def collapse(intervals): """ Compute the union of a list of intervals. The union of intervals is defined as the set-theoretic union for intervals that overlap and concatenation for intervals that don't. Parameters ---------- intervals : list of Intervals Returns ------- list of Intervals """ intervals = sorted(intervals, key=lambda x: x.start) nodes = [Node(i) for i in intervals] for i in xrange(len(intervals)): for j in xrange(i+1, len(intervals)): i1 = intervals[i] i2 = intervals[j] if not i2.is_right_adjacent_to(i1) and i2.start > i1.end: break if i1.overlap(i2) > 0 or i1.is_adjacent(i2): nodes[i].add_link(nodes[j]) r = [] for c in connected(nodes): starts, ends = zip(*(node.value for node in c)) r.append(Interval(min(starts), max(ends))) return sorted(r, key=lambda x: x.start)
def truncate_intervals(clsdict, corpus, mapping): disc = {} interval_errors = [] filename_errors = [] for class_id in clsdict: fragments = [] for fragment in clsdict[class_id]: qname = fragment.name qstart = fragment.interval.start qend = fragment.interval.end try: finterval = mapping.largest_overlap(qname, fragment.interval) except KeyError: filename_errors.append(fragment.name) continue except ValueError: interval_errors.append(fragment) fstart, fend = finterval if qstart != fstart or qend != fend: newstart = max(qstart, fstart) newend = min(qend, fend) newinterval = Interval(newstart, newend) newmark = corpus.annotation(qname, newinterval) fragment = FragmentToken(qname, newinterval, newmark) fragments.append(fragment) disc[class_id] = tuple(fragments) return ClassDict(disc), filename_errors, interval_errors
def load_alignment(fname, strip_tags=True): """Loads a .ctm alignment file into FragmentTokens.""" fragment_lists = [] fragments = [] previous_name = "" for line in open(fname): name, _, start, duration, mark = line.strip().split(' ') if name != previous_name: if fragments != []: fragment_lists.append(fragments) fragments = [] previous_name = name start = round(float(start), 2) stop = start + round(float(duration), 2) interval = Interval(start, stop) if "phone" in fname and strip_tags: mark = mark.split('_')[0] fragment = FragmentToken(name, interval, mark) fragments.append(fragment) if fragments != []: fragment_lists.append(fragments) # Phone and word alignments aren't necessarily in the same order, so sort. fragment_lists.sort() return fragment_lists
def annotate_classes(clsdict, corpus, split=None): new = {} # with annotation errors = [] check_split = not (split is None) for classID, tokenlist in clsdict.iteritems(): newtokens = [] for token in tokenlist: filename = token.name interval = token.interval if check_split and not split.is_covered(filename, interval): errors.append(token) try: finterval = split.largest_overlap(filename, interval) qstart, qend = interval fstart, fend = finterval if fstart != qstart or fstart != qend: newstart = max(fstart, qstart) newend = min(fend, qend) interval = Interval(newstart, newend) except KeyError: continue except ValueError: continue try: annot = tuple(corpus.annotation(filename, interval)) except: continue newtokens.append(FragmentToken(filename, interval, annot)) if len(newtokens) > 0: newtokens = tuple(newtokens) new[classID] = newtokens return ClassDict(new), errors
def split_em(phn_fragments, outdir): intervals = { f[0].name: Interval(f[0].interval.start, f[-1].interval.end) for f in phn_fragments } names_cross = list(grouper(1000, random.sample(intervals.items(), 4000))) intervals_per_speaker = defaultdict(set) for fname, interval in intervals.iteritems(): intervals_per_speaker[fname.split('_')[2]].add((fname, interval)) names_within = [ list(v) for v in intervals_per_speaker.values() if len(v) > 200 ] with open(path.join(outdir, 'xitsonga.intervals.cross'), 'w') as fp: fp.write('\n\n'.join('\n'.join( '{0} {1:.2f} {2:.2f}'.format(name, interval.start, interval.end) for name, interval in sorted(ns)) for ns in names_cross)) with open(path.join(outdir, 'xitsonga.intervals.within'), 'w') as fp: fp.write('\n\n'.join('\n'.join( '{0} {1:.2f} {2:.2f}'.format(name, interval.start, interval.end) for name, interval in sorted(ns)) for ns in names_within)) # fp.write('\n\n'.join('\n'.join(sorted(ns)) for ns in names_within)) fnames = list(set(f[0].name for f in phn_fragments)) with open(path.join(outdir, 'xitsonga.files'), 'w') as fp: fp.write('\n'.join(sorted(fnames)))
def test_find(self): assert (list(self.m.find('a', self.q1)) == [Interval(0.0, 1.0)]) assert (list(self.m.find('a', self.q2)) == [Interval(0.0, 1.0)]) assert (list(self.m.find('a', self.q3)) == [Interval(0.0, 1.0)]) assert (list(self.m.find( 'a', self.q4)) == [Interval(0.0, 1.0), Interval(2.0, 3.0)]) assert (list(self.m.find('a', self.q5)) == []) assert (list(self.m.find('a', self.q6)) == []) assert (list(self.m.find('a', self.q7)) == [Interval(2.0, 3.0)]) assert (list(self.m.find( 'a', self.q8)) == [Interval(2.0, 3.0), Interval(4.0, 5.0)]) assert (list(self.m.find( 'a', self.q9)) == [Interval(2.0, 3.0), Interval(4.0, 5.0)]) assert (list(self.m.find('a', self.q10)) == [])
def load_annot(fname): fs = [] bname = path.splitext(path.basename(fname))[0] for line in open(fname): start, stop, mark = line.strip().split(' ') interval = Interval(round(float(start), 2), round(float(stop), 2)) fragment = FragmentToken(bname, interval, mark) fs.append(fragment) return fs
class TestFragmentType(object): tokens = [ FragmentToken('a', Interval(0.0, 0.1), 'a'), FragmentToken('a', Interval(0.1, 0.2), 'r'), FragmentToken('a', Interval(0.2, 0.3), 'm'), FragmentToken('a', Interval(0.3, 0.4), 's'), FragmentToken('a', Interval(0.4, 0.5), 'a') ] def test_mark(self): ft = FragmentType(self.tokens, 'markymark') assert (ft.tokens == self.tokens) assert (ft.mark == 'markymark') def test_no_mark(self): ft = FragmentType(self.tokens, None) assert (ft.tokens == self.tokens) assert (ft.mark is None)
def __init__(self, name, tokens): self.name = name self.tokens = SortedList(tokens, key=cmp_to_key(token_cmp)) if len(self.tokens) == 0: self.interval = None else: self.interval = Interval(self.tokens[0].interval.start, self.tokens[-1].interval.end) if not all(t1.interval.end == t2.interval.start for t1, t2 in zip(self.tokens[:-1], self.tokens[1:])): raise ValueError('Non-contiguous tokens.')
def split_em(phn_fragments, outdir, prefix): intervals = { f[0].name: Interval(f[0].interval.start, f[-1].interval.end) for f in phn_fragments } #print len(intervals) #'70': [0.0,1.49] size = len(phn_fragments) print(size) names_cross = list( grouper(size / 10, random.sample(intervals.items(), size))) #1000 / 4000 print len(names_cross), len(names_cross[1]) intervals_per_speaker = defaultdict(set) #print(intervals) for fname, interval in intervals.iteritems(): #print intervals_per_speaker.values() #fname = #fname.split("_")[0]#'single' if prefix == "mboshi": intervals_per_speaker[fname.split("_")[0]].add((fname, interval)) else: intervals_per_speaker[fname].add((fname, interval)) names_within = [list(v) for v in intervals_per_speaker.values()] #if len(v) > 2] this makes intervals.within be empty if there are no speaker information #print (len(names_cross[-1])), len(names_cross[0]) names_cross[-1] = [ element for element in names_cross[-1] if element != None ] #names_cross = names_cross[:-1] with open(path.join(outdir, prefix + '.intervals.cross'), 'w') as fp: fp.write('\n\n'.join('\n'.join( '{0} {1:.2f} {2:.2f}'.format(name, interval.start, interval.end) for name, interval in sorted(ns)) for ns in names_cross)) #fp.write('\n') #print len(names_within), len(names_within[0]) #print len(names_within) #print sorted(names_within[0]) with open(path.join(outdir, prefix + '.intervals.within'), 'w') as fp: fp.write('\n\n'.join('\n'.join( '{0} {1:.2f} {2:.2f}'.format(name, interval.start, interval.end) for name, interval in sorted(ns)) for ns in names_within)) #fp.write('\n\n'.join('\n'.join(sorted(ns[0][0])) for ns in names_within)) fp.write('\n') fnames = list(set(f[0].name for f in phn_fragments)) print len(fnames), len(sorted(fnames)) with open(path.join(outdir, prefix + '.files'), 'w') as fp: fp.write('\n'.join(sorted(fnames))) fp.write('\n')
class TestTokenCmp(object): f1 = FragmentToken('a', Interval(0.0, 0.5), None) f2 = FragmentToken('a', Interval(0.5, 1.5), None) f3 = FragmentToken('a', Interval(1.3, 1.4), None) f4 = FragmentToken('b', Interval(0, 1), None) def test_invalid_comparison(self): with pytest.raises(ValueError): token_cmp(self.f1, self.f4) def test_token_eq(self): assert (token_cmp(self.f1, self.f1) == 0) assert (token_cmp(self.f2, self.f2) == 0) assert (token_cmp(self.f3, self.f3) == 0) def test_token_cmp(self): assert (token_cmp(self.f1, self.f2) == -1) assert (token_cmp(self.f1, self.f3) == -1) assert (token_cmp(self.f2, self.f1) == 1) assert (token_cmp(self.f3, self.f1) == 1) assert (token_cmp(self.f2, self.f3) == 0) assert (token_cmp(self.f3, self.f2) == 0)
def test_restrict(self): db1 = IntervalDB({ 'a': [Interval(0, 1)], 'b': [Interval(0, 3)], 'c': [Interval(0, 3)] }) assert (self.c1.restrict(db1) == self.c1) assert (self.c2.restrict(db1) == self.c2) assert (self.c2.restrict(db1, remove_singletons=True) == ClassDict({})) assert (self.c3.restrict(db1) == ClassDict({})) assert (self.c4.restrict(db1) == self.c4) db2 = IntervalDB({'a': [Interval(0, 1)], 'c': [Interval(0, 3)]}) assert (self.c1.restrict(db2) == self.c2) assert (self.c2.restrict(db2) == self.c2) assert (self.c2.restrict(db2, remove_singletons=True) == ClassDict({})) assert (self.c3.restrict(db2) == ClassDict({})) assert (self.c4.restrict(db2) == ClassDict({ self.id0: (self.tokens[0], self.tokens[2]), self.id1: (self.tokens[4], ) })) assert (self.c4.restrict(db2, remove_singletons=True) == ClassDict( {self.id0: (self.tokens[0], self.tokens[2])}))
def load_match_file(match_fn, phn_corpus): with open(match_fn) as f: matches = [] for line in f: # if len(matches) > 5000: # break fields = line.strip().split() if len(fields) == 2: base1, base2 = fields elif len(fields) == 6: dtw = float(fields[4]) start1, end1, start2, end2 = map( lambda x: float(x) / 100.0, fields[:4]) interval1 = Interval(start1, end1) interval2 = Interval(start2, end2) fragment1 = FragmentToken( base1, interval1, phn_corpus.annotation(base1, interval1)) fragment2 = FragmentToken( base2, interval2, phn_corpus.annotation(base2, interval2)) matches.append(Match(fragment1, fragment2, dtw)) random.shuffle(matches) return matches[:100000]
def extract_single(tokens1, tokens2, minlength, maxlength, same): """Extract gold alignments between two phone lists. Parameters ---------- tokens1, tokens2 : list of FragmentTokens minlength : int Minimum number of symbols in a fragment same : boolean Whether `tokens1` and `tokens2` are identical. Returns ------- l : list of (FragmentToken, FragmentToken) List of token pairs containing the cooccurring fragments """ ids1, intervals1, phones1 = zip(*tokens1) ids2, intervals2, phones2 = zip(*tokens2) id1 = ids1[0] # ids are all the same id2 = ids2[0] css = allcommonsubstrings(phones1, phones2, minlength=minlength, maxlength=maxlength, same=same) if css is None: return [] r = [] for slice1, slice2 in css: r.append((FragmentToken(id1, Interval(intervals1[slice1.start].start, intervals1[slice1.stop - 1].end), phones1[slice1]), FragmentToken(id2, Interval(intervals2[slice2.start].start, intervals2[slice2.stop - 1].end), phones2[slice2]))) return r
class TestCheckIntervals(object): m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]}) d1 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )}) d2 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )}) d3 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )}) d4 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )}) def test_good_interval(self): assert (check_intervals(self.d1, self.m1) == ([], [])) def test_interval_errors(self): assert (check_intervals(self.d2, self.m1) == ([ FragmentToken('a', Interval(0.5, 1.5), 'm1') ], [])) assert (check_intervals(self.d4, self.m1) == ([ FragmentToken('a', Interval(0.5, 2.5), 'm1') ], [])) def test_bad_filename(self): assert (check_intervals(self.d3, self.m1) == ([], ['b']))