def load_audio_file(file_path: Path, sr: int, mono: Optional[bool] = True, offset: Optional[float] = 0, duration: Optional[Union[float, None]] = None) \ -> ndarray: """Wrapper for loading audio file. TODO: Add handling of cases where librosa fails. :param file_path: File path of the audio file. :type file_path: pathlib.Path :param sr: Sampling frequency to be used. If different\ from actual, data are resampled. :type sr: int :param mono: Load file as mono? Defaults to True :type mono: bool, optional :param offset: Offset for reading the file. Default to 0.0 :type offset: float, optional :param duration: Duration of the returned data. Defaults to None. :type duration: float|None, optional :return: Audio data as numpy array of shape (channels x samples), \ if channels >= 2, else (samples, ) :rtype: numpy.ndarray """ return librosa_load(path=str(file_path), sr=sr, mono=mono, offset=offset, duration=duration)[0]
def __call__(self, audiopath): audio, _ = librosa_load(audiopath, sr=self.samplerate, mono=True) chromagram = chroma_cens(y=audio, sr=self.samplerate, hop_length=self.step_size) chromagram = np.swapaxes(chromagram, 0, 1) chromagram = np.roll(chromagram, 3, axis=1) return chromagram, self.get_frame_times(chromagram)
def transform_playlist(wav_directory: str): wav_list = keep_files(os_listdir(wav_directory), 'wav') transformer = Transform() transforms_dir = create_sub_dir(wav_directory, 'transforms') transforms_sub_dirs = ['preciser_spectrogram', 'preciser_melspectrogram', 'preciser_cqt', 'locator_v1_cqt', 'locator_v2_spectrogram', 'locator_v2_melspectrogram'] for sub_dir in transforms_sub_dirs: create_sub_dir(transforms_dir, sub_dir) for wav_file in wav_list: samples, sample_rate = librosa_load(os_path_join(wav_directory, wav_file), mono=True, sr=44100) preciser_spectrogram = transformer.get_spectrogram(sample_rate, samples, 10, 512) preciser_melspectrogram = transformer.get_melspectrogram(sample_rate, samples, 10, 256) preciser_cqt = transformer.get_constant_q(sample_rate, samples, 108) locator_v1_cqt = transformer.get_constant_q(sample_rate, samples, 108) locator_v2_spectrogram = transformer.get_spectrogram(sample_rate, samples, 100, 4411) locator_v2_melspectrogram = transformer.get_melspectrogram(sample_rate, samples, 100, 256) transforms = {'preciser_spectrogram': preciser_spectrogram, 'preciser_melspectrogram': preciser_melspectrogram, 'preciser_cqt': preciser_cqt, 'locator_v1_cqt': locator_v1_cqt, 'locator_v2_spectrogram': locator_v2_spectrogram, 'locator_v2_melspectrogram': locator_v2_melspectrogram} for transform in transforms: imsave( os_path_join( transforms_dir, transform, wav_file + '.jpg'), np_transpose(transforms[transform]) ) print('Saved', wav_file + '.jpg')
audio_read_flag = 0 # 1表示需要提取特征,0表示从csv读取已提取好的特征 mfcc_train_root = './mfcc_tmp/mfcc_train.npy' mfcc_test_root = './mfcc_tmp/mfcc_test.npy' if not os_path_exists('./mfcc_tmp/'): makedirs('./mfcc_tmp/') audio_read_flag = 1 audio_train_data = np.empty([train_pos_num + train_neg_num, 2 * parameter_num]) audio_test_data = np.empty([test_num, 2 * parameter_num]) if audio_read_flag == 1: audio_data1 = np.empty([train_pos_num, 2 * parameter_num]) audio_data2 = np.empty([train_neg_num, 2 * parameter_num]) for i in range(0, train_pos_num): y, sr = librosa_load(dataset_root + '/train/positive/' + str(i) + '/audio.wav', sr=16000) # initial sr=16000 mfccs = mfcc(y=y, sr=sr, n_mfcc=parameter_num) for j in range(0, parameter_num): audio_data1[i][j] = np.mean(mfccs[j][:]) audio_data1[i][j + parameter_num] = np.var(mfccs[j][:]) print("Audio positive read finished") for i in range(0, train_neg_num): y, sr = librosa_load(dataset_root + '/train/negative/' + str(i) + '/audio.wav', sr=16000) mfccs = mfcc(y=y, sr=sr, n_mfcc=parameter_num) for j in range(0, parameter_num): audio_data2[i][j] = np.mean(mfccs[j][:]) audio_data2[i][j + parameter_num] = np.var(mfccs[j][:])
def check_file(in_path=""): try: librosa_load(path=in_path, sr=_sr) except Exception as e: remove(in_path) pp("Delete: " + basename(in_path))
def _load_file_raw(self, pkl): self.y, self.sr = librosa_load(path=self.input_file, sr=self.sr, duration=self.load_duration) if Values.USE_PKL: pickle_dump([self.y, self.sr], open(pkl, "wb"))