def test_trim_to_supervisions_mixed_cuts(): cut_set = CutSet.from_cuts([ Cut('cut1', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5), SupervisionSegment('sup2', 'rec1', start=10, duration=5), SupervisionSegment('sup3', 'rec1', start=20, duration=8), ]).append( Cut('cut2', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup4', 'rec1', start=0, duration=30), ])) ]) assert isinstance(cut_set[0], MixedCut) cuts = cut_set.trim_to_supervisions() assert len(cuts) == 4 # After "trimming", the MixedCut "decayed" into simple, unmixed cuts, as they did not overlap assert all(isinstance(cut, Cut) for cut in cuts) assert all(len(cut.supervisions) == 1 for cut in cuts) assert all(cut.supervisions[0].start == 0 for cut in cuts) cut = cuts[0] # Check that the cuts preserved their start/duration/supervisions after trimming assert cut.start == 1.5 assert cut.duration == 8.5 assert cut.supervisions[0].id == 'sup1' cut = cuts[1] assert cut.start == 10 assert cut.duration == 5 assert cut.supervisions[0].id == 'sup2' cut = cuts[2] assert cut.start == 20 assert cut.duration == 8 assert cut.supervisions[0].id == 'sup3' cut = cuts[3] assert cut.start == 0 assert cut.duration == 30 assert cut.supervisions[0].id == 'sup4'
def test_mix_same_recording_channels(): recording = Recording('rec', sampling_rate=8000, num_samples=30 * 8000, duration=30, sources=[ AudioSource('file', channels=[0], source='irrelevant1.wav'), AudioSource('file', channels=[1], source='irrelevant2.wav') ]) cut_set = CutSet.from_cuts([ Cut('cut1', start=0, duration=30, channel=0, recording=recording), Cut('cut2', start=0, duration=30, channel=1, recording=recording) ]) mixed = cut_set.mix_same_recording_channels() assert len(mixed) == 1 cut = mixed[0] assert isinstance(cut, MixedCut) assert len(cut.tracks) == 2 assert cut.tracks[0].cut == cut_set[0] assert cut.tracks[1].cut == cut_set[1]
def _read_audio(cut: Cut, suppress_errors: bool = False) -> Optional[torch.Tensor]: """ Loads audio data from cut, or returns None if there was an error and ``suppress_errors`` was set to ``True``. """ with suppress_and_warn(AudioLoadingError, DurationMismatchError, enabled=suppress_errors): return torch.from_numpy(cut.load_audio()[0])
def cut2(dummy_features): return Cut(id='cut-2', start=180.0, duration=10.0, features=dummy_features, supervisions=[ SupervisionSegment(id='sup-3', recording_id='irrelevant', start=3.0, duration=2.5) ])
def _read_audio( cut: Cut, suppress_errors: bool = False, recording_field: Optional[str] = None) -> Optional[torch.Tensor]: """ Loads audio data from cut, or returns None if there was an error and ``suppress_errors`` was set to ``True``. """ with suppress_audio_loading_errors(enabled=suppress_errors): if recording_field is None: audio = cut.load_audio() else: attr = getattr(cut, recording_field) assert isinstance( attr, Recording ), f"Expected 'getattr(cut, {recording_field})' to yield Recording, got {type(attr)}" audio = cut.load_custom(recording_field) assert audio.shape[ 0] == 1, f"Expected single-channel audio in cut:\n{cut}" return torch.from_numpy(audio[0])
def dummy_cut(id: str = 'irrelevant', start: float = 0.0, duration: float = 1.0, supervisions=None): return Cut( id=id, start=start, duration=duration, features=dummy_features(0), supervisions=supervisions if supervisions is not None else [], )
def dummy_cut(unique_id: int, start: float = 0.0, duration: float = 1.0, supervisions=None): return Cut( id=f'dummy-cut-{unique_id:04d}', start=start, duration=duration, channel=0, features=dummy_features(unique_id), supervisions=supervisions if supervisions is not None else [], )
def test_trim_to_supervisions_mixed_cuts(): cut_set = CutSet.from_cuts([ Cut('cut1', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5), SupervisionSegment('sup2', 'rec1', start=10, duration=5), SupervisionSegment('sup3', 'rec1', start=20, duration=8), ]).append( Cut('cut2', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup4', 'rec1', start=0, duration=30), ])) ]) cuts = cut_set.trim_to_supervisions() assert len(cuts) == 4 assert all(isinstance(cut, MixedCut) for cut in cuts) assert all(cut.start == 0 for cut in cuts) assert all(len(cut.supervisions) == 1 for cut in cuts) assert all(cut.supervisions[0].start == 0 for cut in cuts) cut = cuts[0] assert cut.duration == 8.5 assert cut.supervisions[0].id == 'sup1' cut = cuts[1] assert cut.duration == 5 assert cut.supervisions[0].id == 'sup2' cut = cuts[2] assert cut.duration == 8 assert cut.supervisions[0].id == 'sup3' cut = cuts[3] assert cut.duration == 30 assert cut.supervisions[0].id == 'sup4'
def test_trim_to_unsupervised_segments(): cut_set = CutSet.from_cuts([ # Yields 3 unsupervised cuts - before first supervision, # between sup2 and sup3, and after sup3. Cut('cut1', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5), SupervisionSegment('sup2', 'rec1', start=10, duration=5), SupervisionSegment('sup3', 'rec1', start=20, duration=8), ]), # Does not yield any "unsupervised" cut. Cut('cut2', start=0, duration=30, channel=0, supervisions=[ SupervisionSegment('sup4', 'rec1', start=0, duration=30), ]), ]) unsupervised_cuts = cut_set.trim_to_unsupervised_segments() assert len(unsupervised_cuts) == 3 assert unsupervised_cuts[0].start == 0 assert unsupervised_cuts[0].duration == 1.5 assert unsupervised_cuts[0].supervisions == [] assert unsupervised_cuts[1].start == 15 assert unsupervised_cuts[1].duration == 5 assert unsupervised_cuts[1].supervisions == [] assert unsupervised_cuts[2].start == 28 assert unsupervised_cuts[2].duration == 2 assert unsupervised_cuts[2].supervisions == []
def cut1(dummy_features): return Cut(id='cut-1', start=0.0, duration=10.0, features=dummy_features, supervisions=[ SupervisionSegment(id='sup-1', recording_id='irrelevant', start=0.5, duration=6.0), SupervisionSegment(id='sup-2', recording_id='irrelevant', start=7.0, duration=2.0) ])
def cut_with_relative_paths(): return Cut('cut', 0, 10, 0, features=Features(type='fbank', num_frames=1000, num_features=40, sampling_rate=8000, storage_type='lilcom_files', storage_path='storage_dir', storage_key='feats.llc', start=0, duration=10), recording=Recording('rec', [AudioSource('file', [0], 'audio.wav')], 8000, 80000, 10.0))
def overlapping_supervisions_cut(): return Cut( id='cut-1', start=0.0, duration=0.5, channel=0, features=Features( recording_id='recording-1', channels=0, start=0, duration=0.5, type='fbank', num_frames=50, num_features=80, sampling_rate=16000, storage_type='lilcom', storage_path= 'test/fixtures/dummy_feats/storage/e66b6386-aee5-4a5a-8369-fdde1d2b97c7.llc' ), supervisions=[ SupervisionSegment(id='s1', recording_id='recording-1', start=0.0, duration=0.2), SupervisionSegment(id='s2', recording_id='recording-1', start=0.1, duration=0.2), SupervisionSegment(id='s3', recording_id='recording-1', start=0.2, duration=0.2), SupervisionSegment(id='s4', recording_id='recording-1', start=0.3, duration=0.2) ])
def _read_features(cut: Cut) -> torch.Tensor: return torch.from_numpy(cut.load_features())
def _read_audio(cut: Cut) -> torch.Tensor: return torch.from_numpy(cut.load_audio()[0])
def validate_cut(c: Cut, read_data: bool = False) -> None: # Validate MixedCut if isinstance(c, MixedCut): assert (len(c.tracks) > 0), f"MonoCut {c.id}: Mixed cut must have at least one track." for idx, track in enumerate(c.tracks): validate_cut(track.cut, read_data=read_data) assert (track.offset >= 0), f"MonoCut: {c.id}: track {idx} has a negative offset." return # Validate MonoCut and PaddingCut assert c.start >= 0, f"MonoCut {c.id}: start must be 0 or greater (got {c.start})" assert ( c.duration > 0 ), f"MonoCut {c.id}: duration must be greater than 0 (got {c.duration})" assert ( c.sampling_rate > 0 ), f"MonoCut {c.id}: sampling_rate must be greater than 0 (got {c.sampling_rate})" assert ( c.has_features or c.has_recording ), f"MonoCut {c.id}: must have either Features or Recording attached." # The rest pertains only to regular Cuts if isinstance(c, PaddingCut): return # Conditions related to features if c.has_features: validate_features(c.features) assert c.channel == c.features.channels if read_data: # We are not passing "read_data" to "validate_features" to avoid loading feats twice; # we'll just validate the subset of the features relevant for the cut. feats = c.load_features() n_fr, n_ft = feats.shape assert ( c.num_frames == n_fr ), f"MonoCut {c.id}: expected num_frames: {c.num_frames}, actual: {n_fr}" assert ( c.num_features == n_ft ), f"MonoCut {c.id}: expected num_features: {c.num_features}, actual: {n_ft}" # Conditions related to recording if c.has_recording: validate_recording(c.recording) assert c.channel in c.recording.channel_ids if read_data: # We are not passing "read_data" to "validate_recording" to avoid loading audio twice; # we'll just validate the subset of the recording relevant for the cut. samples = c.load_audio() assert ( c.num_samples == samples.shape[1] ), f"MonoCut {c.id}: expected {c.num_samples} samples, got {samples.shape[1]}" # Conditions related to supervisions for s in c.supervisions: validate_supervision(s) assert s.recording_id == c.recording_id, ( f"MonoCut {c.id}: supervision {s.id} has a mismatched recording_id " f"(expected {c.recording_id}, supervision has {s.recording_id})") assert s.channel == c.channel, ( f"MonoCut {c.id}: supervision {s.id} has a mismatched channel " f"(expected {c.channel}, supervision has {s.channel})") # Conditions related to custom fields if c.custom is not None: assert isinstance( c.custom, dict ), f"MonoCut {c.id}: custom field has to be set to a dict or None." for key, value in c.custom.items(): if isinstance(value, Array): validate_array(value, read_data=read_data) elif isinstance(value, TemporalArray): validate_temporal_array(value, read_data=read_data) if not isclose(c.duration, value.duration): logging.warning( f"MonoCut {c.id}: possibly mismatched " f"duration between cut ({c.duration}s) and temporal array " f"in custom field '{key}' (num_frames={value.num_frames} * " f"frame_shift={value.frame_shift} == duration={value.duration})." )