def downsample_wav(file, factor): hr_sample_rate = sox.file_info.sample_rate(file) lr_sample_rate = hr_sample_rate / factor tfm = sox.Transformer() tfm.rate(lr_sample_rate) out_path = file.split('.wav')[0] + "_lr.wav" tfm.build(file, out_path)
def ensure_samplerate(audio_path): samplerate = sox.file_info.sample_rate(audio_path) if samplerate != 44100: tfm = sox.Transformer() tfm.rate(44100) _, pyin_audio = tempfile.mkstemp(suffix='.wav') tfm.build(audio_path, pyin_audio) os.remove(audio_path) shutil.move(pyin_audio, audio_path)
def creat_vol_augmentation(filelist, source_folder, target_folder, vol_list): aug_generator = sox.Transformer() for volume in vol_list: aug_generator.vol(volume) for index, files in enumerate(filelist): save_filename = TARGET_FOLDER + files.split('.')[0] + '_' + str( volume) + '.' + files.split('.')[1] print(save_filename) aug_generator.build(source_folder + files, save_filename)
def creat_speed_vol_augmentation(filelist, target_folder, speed, volume): aug_generator = sox.Transformer() aug_generator.vol(volume) aug_generator.speed(speed) for index, files in enumerate(filelist): save_filename = TARGET_FOLDER + filename.split('.')[0].split( '/')[1] + '/' + files.split('/')[-2] + '/s' + str( speed) + '_v' + str(volume) + '_' + files.split('/')[-1] aug_generator.build(files, save_filename)
def upsample_wav(file, factor): lr_sample_rate = sox.file_info.sample_rate(file) hr_sample_rate = lr_sample_rate * factor tfm = sox.Transformer() tfm.rate(hr_sample_rate) out_path = file.split('.wav')[0] + "_hr.wav" tfm.build(file, out_path)
def peak_hz(a_file): tfm = sox.Transformer() dft = tfm.power_spectrum(a_file) max_amp = 0 for i in range(0, len(dft)): if (dft[i][1] > max_amp): max_amp = dft[i][1] max_hz = dft[i][0] return max_hz
def run_import_chain(in_file, out_file): trf = sox.Transformer() trf.set_input_format(file_type="wav") trf = trf.silence(location=1, buffer_around_silence=True) trf = trf.reverse() trf = trf.silence(location=1, buffer_around_silence=True) trf = trf.reverse() #trf = trf.norm() trf.build(in_file, out_file)
def __normalize_audio_file(self, input_file, output_file): try: transformer = sox.Transformer() transformer.norm(0) transformer.build(input_file, output_file) return True except Exception as e: self.__logger.error("error in __normalize_audio_file {0}".format( e.message))
def run_segment(item): uid, val = item infile, start_sec, end_sec, outfile = val sox_tfm = sox.Transformer() sox_tfm.set_output_format(file_type="flac", encoding="signed-integer", bits=16) sox_tfm.trim(start_sec, end_sec) sox_tfm.build(infile, outfile)
def handle_file(file: UploadedFile, ops: List[Tuple[Callable, List]]): tfm = sox.Transformer() full_path_input, filename = save_file(file) format = get_format(file) for o in ops: o[0](tfm, *o[1]) filename, format = get_file(tfm, filename, format) full_path_output = OUTPUT_DIRECTORY + filename + format return full_path_input, full_path_output
def convert_wav(mp3_filename, wav_filename): if not path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=16000) try: transformer.build(mp3_filename, wav_filename) except sox.core.SoxError: print("error") pass
def process(self, in_file): trf = sox.Transformer() trf.silence(location=1, buffer_around_silence=True) trf.reverse() trf.silence(location=1, buffer_around_silence=True) trf.reverse() trf.norm() trf.set_input_format(file_type="wav") trf.build(in_file, self.temp_wav)
def trim_song_in_middle(fname, output_fname, window=5): """ Trims `window' seconds from the middle of the song. Supposedly will speed up classification. """ tfm = sox.Transformer() duration = sox.file_info.duration(fname) tfm.trim(duration / 2 - window / 2, duration / 2 + window / 2) tfm.build(fname, output_fname)
def splitMic(): print('Splitting written mic channels...') for channel in range(1, 9): tfm = sox.Transformer() remix_dictionary = {1: [channel]} tfm.remix(remix_dictionary) filename_mic_channel = (filename_mic[:-4] + '-ch-' + str(channel) + '.wav') tfm.build(filename_mic, filename_mic_channel) print('Splitted 8 channels to separate files')
def mp3_to_flac(data, dest_path, name): tmp_filename = os.path.join('/tmp/transcode-tmp.mp3') dest_filename = os.path.join(dest_path, name + '.flac') with open(tmp_filename, 'wb') as content_file: size = content_file.write(data) status = 0 if size == len(data) else -1 transformer = sox.Transformer() transformer.convert(samplerate=16000, n_channels=2, bitdepth=16) transformer.build(tmp_filename, dest_filename) return dest_filename
def save_small_wav(out_path, y, fs): fhandle, tmp_file = tempfile.mkstemp(suffix='.wav') librosa.output.write_wav(tmp_file, y, fs) tfm = sox.Transformer() tfm.convert(bitdepth=16) tfm.build(tmp_file, out_path) os.close(fhandle) os.remove(tmp_file)
def creat_speed_augmentation(filelist, source_folder, target_folder, speed_list): aug_generator = sox.Transformer() for speed in speed_list: aug_generator.speed(speed) for index, files in enumerate(filelist): save_filename = TARGET_FOLDER + files.split('.')[0] + '_' + str( speed) + '.' + files.split('.')[1] print(save_filename) aug_generator.build(source_folder + files, save_filename)
def resample_file(file_name, out_dir, out_sample_rate=0, out_bit_depth=16): out_file_name = os.path.join(out_dir, os.path.basename(file_name)) if out_sample_rate == 0: out_sample_rate = get_target_sample_rate(file_name) logging.info( 'Converting... ( input: {}, output: {}, samplerate: {}, bitdepth: {} )' .format(file_name, out_file_name, out_sample_rate, out_bit_depth)) tfm = sox.Transformer() tfm.convert(samplerate=out_sample_rate, bitdepth=out_bit_depth) tfm.build(file_name, out_file_name)
def resample(self, file): """Uses Sox to resample the wav file to 16kHz, 1 channel, 16 bit wav which is the ideal format for processing""" sampler = sox.Transformer() sampler.convert(samplerate=16000, n_channels=1, bitdepth=16) resampled_file = '{0}_sampled'.format(file) resampled_path = common.file_path(resampled_file) sampler.build(common.file_path(file), resampled_path) common.file_exists(resampled_path) return resampled_file
def _maybe_convert_wav(self, mp3_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) try: transformer.build(str(mp3_filename), str(wav_filename)) except sox.core.SoxError: pass
def _maybe_convert_wav(ogg_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) try: transformer.build(ogg_filename, wav_filename) except sox.core.SoxError as ex: print("SoX processing error", ex, ogg_filename, wav_filename)
def get_array(self) -> AudioArray: combiner = sox.Combiner() input_files = [input.get_temp_file() for input in self.__inputs] combiner.build(input_filepath_list=input_files, output_filepath=self._temp_filepath, combine_type=self.__combine_type) return AudioArray(array=sox.Transformer().build_array( input_filepath=self._temp_filepath), sample_rate=sox.file_info.sample_rate( input_filepath=self._temp_filepath))
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) self.ring_buffer_complete.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect(resource_filename=resource, model_str=model_str) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback) self.ring_buffer_complete = RingBuffer() # create transformer self.tfm = sox.Transformer() self.tfm.set_input_format(rate=16000, bits=16, channels=1, encoding='signed-integer')
def copy_sources(song_paths, split): tfm = sox.Transformer() tfm.convert(n_channels=1) for song_path in song_paths: song_name = song_path.split('/')[-1] for source in sources: source_file = os.path.join(song_path, source + '.wav') destination_file = os.path.join('data', 'musdb', split, source, song_name + '.wav') print('Copying %s to %s' % (source_file, destination_file)) tfm.build(source_file, destination_file)
def convert_audio(src_audio_path, dst_audio_path, file_type=None, audio_format=DEFAULT_FORMAT): import sox transformer = sox.Transformer() transformer.set_output_format(file_type=file_type, rate=audio_format.rate, channels=audio_format.channels, bits=audio_format.width * 8) transformer.build(src_audio_path, dst_audio_path)
def resample_files(input_dir, output_dir): try: for filename in os.listdir(input_dir): filename_split = filename.split(".wav") print(filename) tfm = sox.Transformer() tfm.set_output_format(bits=16, rate=22050, channels=1) tfm.build(os.path.join(input_dir, filename), os.path.join(output_dir, filename)) except IOError: print("There was an error trying to read input/output directory.")
def convert_audio(src_audio_path, dst_audio_path, file_type=None, audio_format=DEFAULT_FORMAT): sample_rate, channels, width = audio_format transformer = sox.Transformer() transformer.set_output_format(file_type=file_type, rate=sample_rate, channels=channels, bits=width * 8) transformer.build(src_audio_path, dst_audio_path)
def get_array(self) -> AudioArray: overdrive = sox.Transformer() overdrive.overdrive( gain_db=self.__gain_db, colour=self.__colour, ) audio_array = self._input.get_array() return AudioArray(array=overdrive.build_array( input_array=audio_array.array, sample_rate_in=audio_array.sample_rate), sample_rate=audio_array.sample_rate)
def _maybe_convert_wav(self, orig_filename, wav_filename): ## MP2/MP3 (with optional libmad, libtwolame and libmp3lame libraries) ## http://sox.sourceforge.net/Docs/Features if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) try: transformer.build(str(orig_filename), str(wav_filename)) except (sox.core.SoxError, sox.core.SoxiError) as ex: print("SoX processing error", ex, orig_filename, wav_filename)
def _resample_sox(self, sample_rate): """Resample the audio signal to the given `sample_rate` using sox""" try: tfm = sox.Transformer() tfm.set_output_format(rate=sample_rate) data = tfm.build_array(input_array=self.data, sample_rate_in=self.sample_rate) return Audio(data, sample_rate, validate=False) except (sox.core.SoxError, ValueError): raise ValueError(f'resampling at {sample_rate} failed!')