def test_extend_by_cut_with_supervision( cut_start, cut_duration, extend_duration, extend_direction, supervision_start, supervision_duration, expected_start, expected_end, ): recording = dummy_recording(int(uuid4()), duration=1.0) supervisions = SupervisionSet.from_segments([ SupervisionSegment( id=int(uuid4()), recording_id=recording.id, start=supervision_start, duration=supervision_duration, ) ]) cut = dummy_cut(int(uuid4()), start=cut_start, duration=cut_duration, supervisions=supervisions) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) assert isclose(extended_cut.supervisions[0].start, expected_start) assert isclose(extended_cut.supervisions[0].end, expected_end)
def random_cut_set(n_cuts=100) -> CutSet: return CutSet.from_cuts( MonoCut(id=uuid4(), start=round(random.uniform(0, 5), ndigits=8), duration=round(random.uniform(3, 10), ndigits=8), channel=0, recording=Recording(id=uuid4(), sources=[], sampling_rate=16000, num_samples=1600000, duration=100.0)) for _ in range(n_cuts))
def test_cut_set_extend_by(): cut1 = dummy_cut(int(uuid4()), start=0.0, duration=0.5) cut2 = dummy_cut(int(uuid4()), start=0.2, duration=0.4) cut_set = CutSet.from_cuts([cut1, cut2]) extended_cut_set = cut_set.extend_by(duration=0.3, direction="both", preserve_id=True) assert isclose(extended_cut_set[cut1.id].start, 0.0) assert isclose(extended_cut_set[cut1.id].end, 0.8) assert isclose(extended_cut_set[cut2.id].start, 0.0) assert isclose(extended_cut_set[cut2.id].end, 0.9)
def store_feature_array( feats: np.ndarray, output_dir: Pathlike, compress: bool = True, lilcom_tick_power: int = -5 ) -> Path: """ Store ``feats`` array on disk, using ``lilcom`` compression by default. :param feats: a numpy ndarray containing features. :param output_dir: a path to the directory where the features will be stored. :param compress: a bool, whether the saved features should be compressed with ``lilcom``. :param lilcom_tick_power: precision of ``lilcom`` compression - greater negative values (e.g. -8). might be appropriate for non-log space features. :return: a path to the file containing the stored array. """ output_dir = Path(output_dir) (output_dir / 'storage').mkdir(parents=True, exist_ok=True) output_features_path = (output_dir / 'storage' / str(uuid4())).with_suffix('.llc' if compress else '.npy') if compress: serialized_feats = lilcom.compress(feats, tick_power=lilcom_tick_power) with open(output_features_path, 'wb') as f: f.write(serialized_feats) else: np.save(output_features_path, feats, allow_pickle=False) return output_features_path
def with_cut(self, sampling_rate: int, num_samples: int, features: bool = True, supervision: bool = False, alignment: bool = False, frame_shift: Seconds = 0.01) -> MonoCut: duration = num_samples / sampling_rate cut = MonoCut( id=str(uuid4()), start=0, duration=duration, channel=0, recording=self.with_recording(sampling_rate=sampling_rate, num_samples=num_samples)) if features: cut = self._with_features(cut, frame_shift=frame_shift) if supervision: cut.supervisions.append( SupervisionSegment( id=f'sup-{cut.id}', recording_id=cut.recording_id, start=0, duration=cut.duration, text='irrelevant', alignment=self._with_alignment(cut, 'irrelevant') if alignment else None)) return cut
def with_cut( self, sampling_rate: int, num_samples: int, features: bool = True, supervision: bool = False ) -> Cut: duration = num_samples / sampling_rate cut = Cut( id=str(uuid4()), start=0, duration=duration, channel=0, recording=self.with_recording(sampling_rate=sampling_rate, num_samples=num_samples) ) if features: cut = self._with_features(cut) if supervision: cut.supervisions.append(SupervisionSegment( id=f'sup-{cut.id}', recording_id=cut.recording_id, start=0, duration=cut.duration, text='irrelevant' )) return cut
def with_cut( self, sampling_rate: int, num_samples: int, features: bool = True, supervision: bool = False, alignment: bool = False, custom_field: bool = False, frame_shift: Seconds = 0.01, ) -> MonoCut: duration = num_samples / sampling_rate cut = MonoCut( id=str(uuid4()), start=0, duration=duration, channel=0, recording=self.with_recording(sampling_rate=sampling_rate, num_samples=num_samples), ) if features: cut = self._with_features(cut, frame_shift=frame_shift) if supervision: cut.supervisions.append( SupervisionSegment( id=f"sup-{cut.id}", recording_id=cut.recording_id, start=0, duration=cut.duration, text="irrelevant", alignment=self._with_alignment(cut, "irrelevant") if alignment else None, )) if custom_field: self._with_custom_temporal_array(cut=cut, frame_shift=frame_shift) return cut
def test_extend_by_cut_preserve_id(preserve_id): cut = dummy_cut(int(uuid4()), start=0.0, duration=0.5) extended_cut = cut.extend_by(duration=0.3, direction="right", preserve_id=preserve_id) if preserve_id: assert extended_cut.id == cut.id else: assert extended_cut.id != cut.id
def random_cut_set(n_cuts=100) -> CutSet: sr = 16000 return CutSet.from_cuts( MonoCut( id=uuid4(), start=random.randint(0, 5 * sr) / sr, duration=random.randint(3 * sr, 10 * sr) / sr, channel=0, recording=Recording( id=uuid4(), sources=[], sampling_rate=16000, num_samples=1600000, duration=100.0, ), ) for _ in range(n_cuts) )
def test_extend_by_cut( cut_start, cut_duration, extend_duration, extend_direction, expected_start, expected_end, ): cut = dummy_cut(int(uuid4()), start=cut_start, duration=cut_duration) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) assert isclose(extended_cut.start, expected_start) assert isclose(extended_cut.end, expected_end)
def test_extend_by_cut_with_features( cut_start, cut_duration, feature_start, feature_duration, extend_duration, extend_direction, expected, ): cut = dummy_cut( int(uuid4()), start=cut_start, duration=cut_duration, features=dummy_features( int(uuid4()), start=feature_start, duration=feature_duration ), ) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) if expected: assert extended_cut.features == cut.features else: assert extended_cut.features is None
def with_recording(self, sampling_rate: int, num_samples: int) -> Recording: f = NamedTemporaryFile('wb', suffix='.wav') self.files.append(f) duration = num_samples / sampling_rate samples = np.random.rand(num_samples) soundfile.write(f.name, samples, samplerate=sampling_rate) return Recording( id=str(uuid4()), sources=[AudioSource(type='file', channels=[0], source=f.name)], sampling_rate=sampling_rate, num_samples=num_samples, duration=duration)
def test_extend_by_cut_with_temporal_array( cut_start, cut_duration, array_start, extend_duration, extend_direction, expected, ): cut = dummy_cut( int(uuid4()), start=cut_start, duration=cut_duration, features=None, recording=dummy_recording(int(uuid4()), duration=1.5), ) cut.temporal_array = dummy_temporal_array(start=array_start) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) if expected: assert extended_cut.temporal_array == cut.temporal_array else: with pytest.raises(ValueError): _ = extended_cut.load_custom("temporal_array")
def store_feature_array( feats: np.ndarray, storage: FeaturesWriter, ) -> str: """ Store ``feats`` array on disk, using ``lilcom`` compression by default. :param feats: a numpy ndarray containing features. :param storage: a ``FeaturesWriter`` object to use for array storage. :return: a path to the file containing the stored array. """ feats_id = str(uuid4()) storage_key = storage.write(feats_id, feats) return storage_key
def with_cut(self, sampling_rate: int, num_samples: int, features: bool = True) -> Cut: duration = num_samples / sampling_rate cut = Cut(id=str(uuid4()), start=0, duration=duration, channel=0, recording=self.with_recording(sampling_rate=sampling_rate, num_samples=num_samples)) if features: cut = self._with_features(cut) return cut
def with_recording( self, sampling_rate: int, num_samples: int, use_zeros: bool = False ) -> Recording: import torchaudio # torchaudio does not have issues on M1 macs unlike soundfile f = NamedTemporaryFile("wb", suffix=".wav") self.files.append(f) duration = num_samples / sampling_rate if use_zeros: samples = torch.zeros((1, num_samples)) else: samples = torch.rand((1, num_samples)) torchaudio.save(f.name, samples, sample_rate=sampling_rate) f.flush() os.fsync(f) return Recording( id=str(uuid4()), sources=[AudioSource(type="file", channels=[0], source=f.name)], sampling_rate=sampling_rate, num_samples=num_samples, duration=duration, )
def random_cut_set(n_cuts=100) -> CutSet: return CutSet.from_cuts( Cut(id=uuid4(), start=round(random.uniform(0, 5), ndigits=8), duration=round(random.uniform(3, 10), ndigits=8), channel=0) for _ in range(n_cuts))