def recording_set(): return RecordingSet.from_recordings([ Recording(id='x', sources=[ AudioSource(type='file', channels=[0], source='text/fixtures/mono_c0.wav'), AudioSource(type='command', channels=[1], source='cat text/fixtures/mono_c1.wav') ], sampling_rate=8000, num_samples=4000, duration=0.5) ])
def recording_set(): return RecordingSet.from_recordings([ Recording( id="x", sources=[ AudioSource(type="file", channels=[0], source="text/fixtures/mono_c0.wav"), AudioSource( type="command", channels=[1], source="cat text/fixtures/mono_c1.wav", ), ], sampling_rate=8000, num_samples=4000, duration=0.5, ) ])
def dummy_recording(unique_id: int) -> Recording: return Recording(id=f'dummy-recording-{unique_id:04d}', sources=[ AudioSource(type='command', channels=[0], source='echo "dummy waveform"') ], sampling_rate=16000, num_samples=16000, duration=1.0)
def with_recording(self, sampling_rate: int, num_samples: int) -> Recording: f = NamedTemporaryFile('wb', suffix='.wav') self.files.append(f) duration = num_samples / sampling_rate samples = np.random.rand(num_samples) soundfile.write(f.name, samples, samplerate=sampling_rate) return Recording( id=str(uuid4()), sources=[AudioSource(type='file', channels=[0], source=f.name)], sampling_rate=sampling_rate, num_samples=num_samples, duration=duration)
def make_recording(sampling_rate: int, num_samples: int) -> Recording: # The idea is that we're going to write to a temporary file with a sine wave recording # of specified duration and sampling rate, and clean up only after the test is executed. with NamedTemporaryFile('wb', suffix='.wav') as f: duration = num_samples / sampling_rate samples: np.ndarray = np.sin(2 * np.pi * np.arange(0, num_samples) / sampling_rate) soundfile.write(f, samples, samplerate=sampling_rate) yield Recording( id=f'recording-{sampling_rate}-{duration}', sources=[AudioSource(type='file', channels=[0], source=f.name)], sampling_rate=sampling_rate, num_samples=num_samples, duration=duration)
def cut_set(): cut = MonoCut( id="cut-1", start=0.0, duration=10.0, channel=0, features=Features( type="fbank", num_frames=100, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=10.0, storage_type="lilcom", storage_path="irrelevant", storage_key="irrelevant", ), recording=Recording( id="rec-1", sampling_rate=16000, num_samples=160000, duration=10.0, sources=[ AudioSource(type="file", channels=[0], source="irrelevant") ], ), supervisions=[ SupervisionSegment(id="sup-1", recording_id="irrelevant", start=0.5, duration=6.0), SupervisionSegment(id="sup-2", recording_id="irrelevant", start=7.0, duration=2.0), ], ) return CutSet.from_cuts([ cut, fastcopy(cut, id="cut-nosup", supervisions=[]), fastcopy(cut, id="cut-norec", recording=None), fastcopy(cut, id="cut-nofeat", features=None), cut.pad(duration=30.0, direction="left"), cut.pad(duration=30.0, direction="right"), cut.pad(duration=30.0, direction="both"), cut.mix(cut, offset_other_by=5.0, snr=8), ])
def cut_set(): cut = Cut(id='cut-1', start=0.0, duration=10.0, channel=0, features=Features( type='fbank', num_frames=100, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=10.0, storage_type='lilcom', storage_path='irrelevant', storage_key='irrelevant', ), recording=Recording(id='rec-1', sampling_rate=16000, num_samples=160000, duration=10.0, sources=[ AudioSource(type='file', channels=[0], source='irrelevant') ]), supervisions=[ SupervisionSegment(id='sup-1', recording_id='irrelevant', start=0.5, duration=6.0), SupervisionSegment(id='sup-2', recording_id='irrelevant', start=7.0, duration=2.0) ]) return CutSet.from_cuts([ cut, fastcopy(cut, id='cut-nosup', supervisions=[]), fastcopy(cut, id='cut-norec', recording=None), fastcopy(cut, id='cut-nofeat', features=None), cut.pad(duration=30.0, direction='left'), cut.pad(duration=30.0, direction='right'), cut.pad(duration=30.0, direction='both'), cut.mix(cut, offset_other_by=5.0, snr=8) ])
def with_recording( self, sampling_rate: int, num_samples: int, use_zeros: bool = False ) -> Recording: import torchaudio # torchaudio does not have issues on M1 macs unlike soundfile f = NamedTemporaryFile("wb", suffix=".wav") self.files.append(f) duration = num_samples / sampling_rate if use_zeros: samples = torch.zeros((1, num_samples)) else: samples = torch.rand((1, num_samples)) torchaudio.save(f.name, samples, sample_rate=sampling_rate) f.flush() os.fsync(f) return Recording( id=str(uuid4()), sources=[AudioSource(type="file", channels=[0], source=f.name)], sampling_rate=sampling_rate, num_samples=num_samples, duration=duration, )
def make_recording_callhome( sph_path: Pathlike, recording_id: Optional[str] = None, relative_path_depth: Optional[int] = None, sph2pipe_path: Optional[Pathlike] = None ) -> Recording: """ This function creates manifests for CallHome recordings that are compressed with shorten, a rare and mostly unsupported codec. You will need to install sph2pipe (e.g. using Kaldi) in order to read these files. """ try: from sphfile import SPHFile except ImportError: raise ImportError("Please install sphfile (pip install sphfile) instead and " "try preparing CallHome English again.") if sph2pipe_path is None: sph2pipe_path = 'sph2pipe' else: sph2pipe_path = str(sph2pipe_path).strip() sph_path = Path(sph_path) sphf = SPHFile(sph_path) return Recording( id=recording_id if recording_id is not None else sph_path.stem, sampling_rate=sphf.format['sample_rate'], num_samples=sphf.format['sample_count'], duration=sphf.format['sample_count'] / sphf.format['sample_rate'], sources=[ AudioSource( type='command', channels=list(range(sphf.format['channel_count'])), source=f'{sph2pipe_path} -f wav -p ' + ( '/'.join(sph_path.parts[-relative_path_depth:]) if relative_path_depth is not None and relative_path_depth > 0 else str(sph_path) ) ) ] )
def file_source(): return AudioSource(type='file', channels=[0], source='test/fixtures/mono_c0.wav')
def file_source(): return AudioSource(type="file", channels=[0], source="test/fixtures/mono_c0.wav")