def pre_sff(self): feature_path = os.path.join(self.dataset['feature_path'], 'pre_sff') if not os.path.exists(feature_path): os.mkdir(feature_path) x_train = [] y_train = [] f_train = [] for i, row in self.dataset.train_data.iterrows(): print('[Train] {}) Getting pre_sff from {}...'.format( i, row['cur_name']), end='') wav_name = os.path.join(self.dataset['data_path'], row['cur_name']) sr, wav_data = wavfile.read(wav_name) spec = stft(buf_to_float(wav_data), n_fft=800, hop_length=160, win_length=320)[:200, :] # до 4К KHz spec = np.log(np.abs(spec) + 1e-10) spec -= np.min(spec) x_train.append(spec) y_train.append(self._build_multilabel(row)) f_train.append(row['cur_name']) print('done.') x_test = [] y_test = [] f_test = [] for i, row in self.dataset.test_data.iterrows(): print('[Test] {}) Getting sff from {}...'.format( i, row['cur_name']), end='') wav_name = os.path.join(self.dataset['data_path'], row['cur_name']) sr, wav_data = wavfile.read(wav_name) spec = stft(buf_to_float(wav_data), n_fft=800, hop_length=160, win_length=320)[:200, :] # до 4К KHz spec = np.log(np.abs(spec) + 1e-10) spec -= np.min(spec) x_test.append(spec) y_test.append(self._build_multilabel(row)) f_test.append(row['cur_name']) print('done.') self._save_pickles(feature_path, x_train, y_train, f_train, x_test, y_test, f_test)
def extract_feat_frame(self, frames, mode=0, file=None, sr=16000, n_fft=512, hop_length=512, n_mels=40, fmax=8000, convert_16I_to_32F=True): #convert 16 bit integer to 32 float if convert_16I_to_32F: n_frames = [] for frame in frames: n_frames.append(buf_to_float(frame, 2, np.float32)) n_frames = np.concatenate(n_frames) frames = np.ascontiguousarray(n_frames, np.float32) frames = self.gain_norm(frames) if mode == 0: return self.extract_melspec_frame(frames, file=file, n_mels=n_mels, sr=sr) elif mode == 1: return self.extract_wav_frame(frames, file=file) elif mode == 2: return self.extract_log_spectrogram_frame(frames, file=file, n_mels=n_mels, sr=sr) else: print("non-supported feature type")
def get_features(self, sr): """ calculates tempo and pitch using librosa documentation https://librosa.github.io/librosa/ """ timeseries = buf_to_float(self.bytes) pitch = estimate_tuning(timeseries, sr) # onset_env = onset_strength(timeseries, sr) # temp = tempo(onset_env, sr)[0] return([pitch])
def load(path, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best'): y = [] with audioread.audio_open(os.path.realpath(path)) as input_file: sr_native = input_file.samplerate n_channels = input_file.channels duration_1 = input_file.duration s_start = int(np.round(sr_native * offset)) * n_channels if duration is None: s_end = np.inf else: s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) n = 0 duration_2 = 0 xxx = 0 for frame in input_file: duration_2 = duration_2 + 1 frame = util.buf_to_float(frame, dtype=dtype) n_prev = n n = n + len(frame) if n < s_start: # offset is after the current frame # keep reading continue if s_end < n_prev: # we're off the end. stop reading break if s_end < n: # the end is in this frame. crop. frame = frame[:(s_end - n_prev)] if n_prev <= s_start and s_start <= n: # beginning is in this frame frame = frame[(s_start - n_prev):] xxx = xxx + 1 # tack on the current frame y.append(frame) if y: y1 = np.concatenate(y) print(".") if n_channels > 1: y1 = y1.reshape((-1, n_channels)).T if mono: y1 = to_mono(y1) if sr is not None: y1 = resample(y1, sr_native, sr, res_type=res_type) else: sr = sr_native # Final cleanup for dtype and contiguity y2 = np.ascontiguousarray(y1, dtype=dtype) return y2, sr
def stream_to_np(bytes_io, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best'): """ 重写了librosa.load函数,把文件参数改成bytesIO类型,并把audioread.audio_open替换为自定义的RawAudioStream类, 因为前者需要文件路径作为参数。 :param bytes_io: :param sr: :param mono: :param offset: :param duration: :param dtype: :param res_type: :return: """ y = [] with RawAudioStream(bytes_io) as input_file: sr_native = input_file.samplerate n_channels = input_file.channels s_start = int(np.round(sr_native * offset)) * n_channels if duration is None: s_end = np.inf else: s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) n = 0 for frame in input_file: frame = util.buf_to_float(frame, dtype=dtype) n_prev = n n = n + len(frame) if n < s_start: # offset is after the current frame # keep reading continue if s_end < n_prev: # we're off the end. stop reading break if s_end < n: # the end is in this frame. crop. frame = frame[:s_end - n_prev] if n_prev <= s_start <= n: # beginning is in this frame frame = frame[(s_start - n_prev):] # tack on the current frame y.append(frame) if y: y = np.concatenate(y) if n_channels > 1: y = y.reshape((-1, n_channels)).T if mono: y = to_mono(y) if sr is not None: y = resample(y, sr_native, sr, res_type=res_type) else: sr = sr_native # Final cleanup for dtype and contiguity y = np.ascontiguousarray(y, dtype=dtype) return y, sr
def load_yield_chunks(path, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best', choplenspls=0, hoplenspls=0): """Load an audio file as a floating point time series. This is MODIFIED from librosa's own load() function, to yield chunks one-by-one so they never all need to be loaded into memory. Parameters ---------- path : string path to the input file. Any format supported by `audioread` will work. sr : number > 0 [scalar] target sampling rate 'None' uses the native sampling rate mono : bool convert signal to mono offset : float start reading after this time (in seconds) duration : float only load up to this much audio (in seconds) dtype : numeric type data type of `y` res_type : str resample type (see note) .. note:: By default, this uses `resampy`'s high-quality mode ('kaiser_best'). To use a faster method, set `res_type='kaiser_fast'`. To use `scipy.signal.resample`, set `res_type='scipy'`. choplenspls : int number of samples in each chunk to be yielded. Returns ------- y : np.ndarray [shape=(n,) or (2, n)] audio time series sr : number > 0 [scalar] sampling rate of `y` Examples -------- >>> # Load a wav file >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename) >>> y array([ -4.756e-06, -6.020e-06, ..., -1.040e-06, 0.000e+00], dtype=float32) >>> sr 22050 >>> # Load a wav file and resample to 11 KHz >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename, sr=11025) >>> y array([ -2.077e-06, -2.928e-06, ..., -4.395e-06, 0.000e+00], dtype=float32) >>> sr 11025 >>> # Load 5 seconds of a wav file, starting 15 seconds in >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename, offset=15.0, duration=5.0) >>> y array([ 0.069, 0.1 , ..., -0.101, 0. ], dtype=float32) >>> sr 22050 """ if not hoplenspls or (hoplenspls <= 0 or hoplenspls > choplenspls): hoplenspls = choplenspls y = np.array([], dtype=dtype) with audioread.audio_open(os.path.realpath(path)) as input_file: sr_native = input_file.samplerate n_channels = input_file.channels s_start = int(np.round(sr_native * offset)) * n_channels if duration is None: s_end = np.inf else: s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) n = 0 for frame in input_file: frame = util.buf_to_float(frame, dtype=dtype) n_prev = n n = n + len(frame) if n < s_start: # offset is after the current frame # keep reading continue if s_end < n_prev: # we're off the end. stop reading break if s_end < n: # the end is in this frame. crop. frame = frame[:s_end - n_prev] if n_prev <= s_start <= n: # beginning is in this frame frame = frame[(s_start - n_prev):] # NB here we apply to one single frame, the postprocessing that librosa applies to the whole file at the end if n_channels > 1: frame = frame.reshape((-1, n_channels)).T if mono: frame = to_mono(frame) if sr is not None: frame = resample(frame, sr_native, sr, res_type=res_type) else: sr = sr_native # Final cleanup for dtype and contiguity frame = np.ascontiguousarray(frame, dtype=dtype) y = np.concatenate((y, frame)) while y.shape[0] >= choplenspls: yield (y[:choplenspls], sr) y = y[hoplenspls:] if y.shape[0] != 0: print( "WARNING: load_yield_chunks() dropped %i final samples" % (y.shape[0]) ) # TODO can the final incomplete chunk be handled elegantly within the above loop?