def cut(input_path, output_file, metadata): segments = metadata['segments'] segments = [segment_seconds(segment) for segment in segments] with TempFile('.mp3') as temp_file: # Open a new temporary file to store audio in between processes if segments: # Cut audio into segments and create fade in/out # We need to use a new temporary file for each # audio segment temp_segments = [TempFile('.mp3') for segment in segments] try: for index, segment in enumerate(segments): sox = Transformer() sox.channels(1) sox.norm(-24) sox.trim(*segment) sox.fade(1, 2, 't') sox.build(input_path, temp_segments[index].path) if len(segments) > 1: # Concatenate all the audio segments back together # and output to our main temporary file Combiner().build( [temp_segment.path for temp_segment in temp_segments], temp_file.path, 'concatenate', ) else: # Only one segment so we don't need to combine anything subprocess.run( ['cp', temp_segments[0].path, temp_file.path]) except Exception as e: raise (e) finally: # Cleanup temporary segment files even on error if temp_segments: for temp_segment in temp_segments: temp_segment.close() # Second process: filter, compress and EQ the # audio in temporary file and output to output_file sox = Transformer() sox.highpass(100) sox.lowpass(10000) sox.compand(0.005, 0.12, 6, [ (-90, -90), (-70, -55), (-50, -35), (-32, -32), (-24, -24), (0, -8), ]) sox.equalizer(3000, 1000, 3) sox.equalizer(280, 120, 3) sox.build(temp_file.path, output_file)
def preprocess_wav(cls, fpath: Union[str, Path]) -> np.ndarray: """Load, resample, normalize and trim a waveform.""" transformer = Transformer() transformer.norm() transformer.silence(silence_threshold=1, min_silence_duration=0.1) transformer.set_output_format(rate=cls.sample_rate, bits=16, channels=1) wav = transformer.build_array(input_filepath=str(fpath)) wav = wav / (2**15) return wav.astype(np.float32)
def loadFile(data, max_timestep): transformer = Transformer() transformer.norm() # transformer.silence(silence_threshold=1, min_silence_duration=0.1) transformer.set_output_format(rate=16000, bits=16, channels=1) wav = transformer.build_array(input_filepath=str(data)) wav = torch.tensor(wav / (2**15)).float() length = len(wav) if length > max_timestep: start = 0 end = max_timestep length = max_timestep wav = wav[start:end] length = torch.tensor(length).long() return wav, length
def loadFile_thread_exec(data): wavs = [] lengths = [] for i in range(len(data)): fullPath = data[i] transformer = Transformer() transformer.norm() transformer.silence(silence_threshold=1, min_silence_duration=0.1) transformer.set_output_format(rate=16000, bits=16, channels=1) wav = transformer.build_array(input_filepath=str(fullPath)) wav = torch.tensor(wav / (2**15)).float() length = len(wav) if length > max_timestep: start = random.randint(0, int(length - max_timestep)) end = start + max_timestep length = max_timestep wav = wav[start:end] wavs.append(wav) lengths.append(torch.tensor(length).long()) return wavs, lengths
def _processSamples(sample_list): for sample in sample_list: sample_new_name = _renameSample(sample) _out = join(out_path, sample_new_name) processed_samples.append(_out) _in = sample # Sox processing using Transform instance tfm = Transformer() tfm.convert(samplerate=44100, n_channels=2, bitdepth=16) if NORMALIZE: tfm.norm(db_level=-3) if SILENCE: tfm.silence(location=-1, silence_threshold=0.05, min_silence_duration=0.1) if PADDING: tfm.pad(0, PADDING) tfm.build(_in, _out)