def perturb(self, data): impulse_record = self._rng.sample(self._manifest.data, 1)[0] impulse = AudioSegment.from_file(impulse_record['audio_filepath'], target_sr=data.sample_rate) logging.debug("impulse: %s", impulse_record['audio_filepath']) data._samples = signal.fftconvolve(data.samples, impulse.samples, "full")
def perturb(self, data): att_factor = 0.8 max_level = np.max(np.abs(data._samples)) norm_factor = att_factor / max_level norm_samples = norm_factor * data._samples orig_f = NamedTemporaryFile(suffix=".wav") sf.write(orig_f.name, norm_samples.transpose(), 16000) codec_ind = random.randint(0, len(self._codecs) - 1) if self._codecs[codec_ind] == "amr-nb": transcoded_f = NamedTemporaryFile(suffix="_amr.wav") rates = list(range(0, 8)) rate = rates[random.randint(0, len(rates) - 1)] _ = subprocess.check_output( f"sox {orig_f.name} -V0 -C {rate} -t amr-nb - | sox -t amr-nb - -V0 -b 16 -r 16000 {transcoded_f.name}", shell=True, ) elif self._codecs[codec_ind] == "g711": transcoded_f = NamedTemporaryFile(suffix="_g711.wav") _ = subprocess.check_output( f"sox {orig_f.name} -V0 -r 8000 -c 1 -e a-law {transcoded_f.name}", shell=True) new_data = AudioSegment.from_file(transcoded_f.name, target_sr=16000) data._samples = new_data._samples[0:data._samples.shape[0]] return
def read_one_audiosegment(manifest, target_sr, rng, tarred_audio=False, audio_dataset=None): if tarred_audio: if audio_dataset is None: raise TypeError("Expected augmentation dataset but got None") audio_file, file_id = next(audio_dataset) manifest_idx = manifest.mapping[file_id] manifest_entry = manifest[manifest_idx] offset = 0 if manifest_entry.offset is None else manifest_entry.offset duration = 0 if manifest_entry.duration is None else manifest_entry.duration else: audio_record = rng.sample(manifest.data, 1)[0] audio_file = audio_record.audio_file offset = 0 if audio_record.offset is None else audio_record.offset duration = 0 if audio_record.duration is None else audio_record.duration return AudioSegment.from_file(audio_file, target_sr=target_sr, offset=offset, duration=duration)
def perturb(self, data): snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db) noise_record = self._rng.sample(self._manifest.data, 1)[0] noise = AudioSegment.from_file(noise_record.audio_file, target_sr=data.sample_rate) noise_gain_db = min(data.rms_db - noise.rms_db - snr_db, self._max_gain_db) # logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record.audio_file) # calculate noise segment to use start_time = self._rng.uniform(0.0, noise.duration - data.duration) if noise.duration > (start_time + data.duration): noise.subsegment(start_time=start_time, end_time=start_time + data.duration) # adjust gain for snr purposes and superimpose noise.gain_db(noise_gain_db) if noise._samples.shape[0] < data._samples.shape[0]: noise_idx = self._rng.randint( 0, data._samples.shape[0] - noise._samples.shape[0]) data._samples[noise_idx:noise_idx + noise._samples.shape[0]] += noise._samples else: data._samples += noise._samples
def perturb(self, data): impulse_record = self._rng.sample(self._manifest.data, 1)[0] impulse = AudioSegment.from_file(impulse_record.audio_file, target_sr=data.sample_rate) # logging.debug("impulse: %s", impulse_record['audio_filepath']) impulse_norm = (impulse.samples - min(impulse.samples)) / ( max(impulse.samples) - min(impulse.samples)) data._samples = signal.fftconvolve(data._samples, impulse_norm, "same")
def process(self, file_path, offset=0, duration=0, trim=False): audio = AudioSegment.from_file( file_path, target_sr=self.sample_rate, int_values=self.int_values, offset=offset, duration=duration, trim=trim, ) return self.process_segment(audio)
def exposed_get_path_samples(self, file_path, target_sr, int_values, offset, duration, trim): print(f"loading.. {file_path}") audio = AudioSegment.from_file( file_path, target_sr=target_sr, int_values=int_values, offset=offset, duration=duration, trim=trim, ) # print(f"returning.. {len(audio.samples)} items of type{type(audio.samples)}") return pickle.dumps(audio.samples)
def perturb(self, data): snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db) noise_record = self._rng.sample(self._manifest.data, 1)[0] noise = AudioSegment.from_file(noise_record['audio_filepath'], target_sr=data.sample_rate) noise_gain_db = min(data.rms_db - noise.rms_db - snr_db, self._max_gain_db) logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record['audio_filepath']) # calculate noise segment to use start_time = self._rng.uniform(0.0, noise.duration - data.duration) noise.subsegment(start_time=start_time, end_time=start_time + data.duration) # adjust gain for snr purposes and superimpose noise.gain_db(noise_gain_db) data._samples = data._samples + noise.samples