def test_from_records(annotation): # Check that we can reconstruct an annotation from the # output of itertracks. records = annotation.itertracks(yield_label=True) actual = Annotation.from_records(records) expected = annotation assert actual == expected
def read_segments_file(segments_path): """Read speech/non-speech annotations from Kaldi segments file.""" columns = ['utterance_uri', 'recording_uri', 'onset', 'offset'] segs_df = pd.read_csv( segments_path, header=None, sep=' ', names=columns) annotations = {} for recording_uri, segs in segs_df.groupby('recording_uri'): records = [(Segment(seg.onset, seg.offset), '_', 'speech') for seg in segs.itertuples(index=False)] ann = Annotation.from_records(records, uri=recording_uri) annotations[recording_uri] = ann return annotations