def get_audio_features_dataframe(n_mfcc): """Extract specified audio features from all audio files and store as csv Args: n_mfcc (int): number of MFCC values to extract Returns: None """ df = pd.DataFrame() for member, label in MEMBER_TO_LABEL.items(): print(member, end=' ') files = os.listdir(f'data/{label}') files = [f'data/{label}/{x}' for x in files] rows = [] for f in files: # Aggregated MFCC values, so that each audio is represented # by a 1D array of length n_mfcc x, sample_rate = load_audio(file_path=f, sample_rate=44100/3) mfcc_feat = MFCC(y=x, sr=sample_rate, n_mfcc=n_mfcc) mfcc_mean = mfcc_feat.mean(axis=1) zero_crossing = sum(librosa.zero_crossings(y=x)) centroids = librosa.feature.spectral_centroid(y=x, sr=sample_rate) # Each coefficient is a feature column row = {'label': label, 'file_path': f} for number, coef in enumerate(mfcc_mean): row[f'mfcc_{n_mfcc}_{number + 1:02d}'] = coef row['zero_crossing'] = zero_crossing row['spectral_centroid'] = np.mean(centroids) rows.append(row) df = df.append(rows, ignore_index=True) df.to_csv(f'features.csv', index=False)
def feature_extration(x, sr): # Zero Crossing Rate n0 = 9000 n1 = 9100 zero_crossings = librosa.zero_crossings(x[n0:n1], pad=False) # Spectral Centroid spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0] #print("sc",spectral_centroids[0] ) # Spectral Rolloff spectral_rolloff = librosa.feature.spectral_rolloff(x + 0.01, sr=sr)[0] #print("sr",spectral_rolloff[0] ) # Mel-Frequency Cepstral Coefficients mfccs = librosa.feature.mfcc(x, sr) mfccs = mfccs.flatten() #print("mfccs",mfccs.shape) # Chroma Frequencies hop_length = 512 chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length) chromagram = chromagram.flatten() #print("c",chromagram.shape ) feature = [ x for x_set in [[sum(zero_crossings)], spectral_centroids[:5], spectral_rolloff[:5], mfccs[:5], chromagram[:5]] for x in x_set ] return np.array(feature)
def get_mean_zero_crossing(x): zero_crossing = librosa.zero_crossings(x, pad=False) converted_array = [int(elem) for elem in zero_crossing] # convert T/F to 1/0 bumped_array = [elem * 10000 for elem in converted_array] mean_zero_crossing = np.mean(bumped_array) return mean_zero_crossing
def pad_end(data, length): pad = data.loc[data.sample_count < length] cropped = [] indexes = [] samples = [] for index, row in pad.iterrows(): sample = row['raw_sounds'] l = len(sample) z = librosa.zero_crossings(sample) crossings = np.nonzero(z) begin = crossings[0][0] end = crossings[0][-1] y = sample[begin:end + 1] cropped.append(y) indexes.append(index) samples.append(len(y)) output = pd.DataFrame(list(zip(cropped, indexes, samples)), columns=['raw_sounds', 'index', 'sample_count']).set_index('index') data.update(output) # Join resampled recordings to raw frame return data
def get_sound_feature(filename): #過零率:zero_crossing x, sr = librosa.load(filename) n0 = 9000 n1 = 9100 zero_crossings = librosa.zero_crossings(x[0:len(x)-1], pad=False) #光譜質心:spectral_centroids spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0] spectral_centroids.shape def normalize(x, axis=0): return sklearn.preprocessing.minmax_scale(x, axis=axis) normalize_spectral_centroids = normalize(spectral_centroids) #光譜衰減:spectral_rolloff spectral_rolloff = librosa.feature.spectral_rolloff(x+0.01, sr=sr)[0] #梅爾頻率倒譜系數:mfccs.mean/ mfccs.var x, fs = librosa.load(filename) mfccs = librosa.feature.mfcc(x, sr=fs) mfccs.shape mfccs = sklearn.preprocessing.scale(mfccs, axis=1) #print(mfccs.mean(axis=1)) #print(mfccs.var(axis=1)) #色度頻率:chromagram x, sr = librosa.load(filename) chromagram = librosa.feature.chroma_stft(x, sr=sr) return sum(zero_crossings), spectral_centroids, spectral_rolloff, mfccs, chromagram, x
def zcr_features(fname): """ Compute zero crossing rate" """ rate, signal = wavfile.read(fname) zcr_feat = librosa.zero_crossings(signal, pad=False) return np.atleast_1d(np.sum(zcr_feat))
def extract_zero_crossing(y, sr, repeat_length): zero_crossings = librosa.zero_crossings(y, pad=False) zcrsum = sum(zero_crossings) zcrsum = np.repeat(zcrsum, repeat_length) zcrsum = np.reshape(zcrsum, (-1, 1)) zcrsum = zcrsum.T return zcrsum
def compute_TFeatures(self): """ COmpute temporal features such as zero crossing and ZCR returns: tuple of ZCR od ZC index ZCR: np.ndarray ZC idx: tuple """ return (lbr.feature.zero_crossing_rate(y=self.audio, frame_length=self.fft_len, \ hop_length=self.hop, center=False), \ np.nonzero(lbr.zero_crossings(y=self.audio)))
def audio_features(filename): hop_length = 512 n_fft = 2048 #load file y, sr = librosa.load(filename) duration = float(librosa.core.get_duration(y)) #extract features from librosa tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) beat_times = librosa.frames_to_time(beat_frames, sr=sr) y_harmonic, y_percussive = librosa.effects.hpss(y) mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) mfcc_delta = librosa.feature.delta(mfcc) beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median) beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) zero_crossings = librosa.zero_crossings(y) zero_crossing_time = librosa.feature.zero_crossing_rate(y) spectral_centroid = librosa.feature.spectral_centroid(y) spectral_bandwidth = librosa.feature.spectral_bandwidth(y) spectral_contrast = librosa.feature.spectral_contrast(y) spectral_rolloff = librosa.feature.spectral_rolloff(y) rmse = librosa.feature.rmse(y) poly_features = librosa.feature.poly_features(y) chroma_stft = librosa.feature.chroma_stft(y) chroma_cens = librosa.feature.chroma_cens(y) tonnetz = librosa.feature.tonnetz(y) mfcc_all = statlist(mfcc) mfccd_all = statlist(mfcc_delta) bmfccd_all = statlist(beat_mfcc_delta) cg_all = statlist(chromagram) bc_all = statlist(beat_chroma) bf_all = statlist(beat_features) zc_all = statlist(zero_crossings) sc_all = statlist(spectral_centroid) sb_all = statlist(spectral_bandwidth) sc_all = statlist(spectral_contrast) sr_all = statlist(spectral_rolloff) rmse_all = statlist(rmse) pf_all = statlist(poly_features) cstft_all = statlist(chroma_stft) ccens_all = statlist(chroma_cens) tonnetz_all = statlist(tonnetz) return [ duration, float(tempo), beat_frames.tolist(), beat_times.tolist(), mfcc_all, mfccd_all, bmfccd_all, cg_all, bc_all, bf_all, zc_all, sc_all, sb_all, sc_all, sr_all, rmse_all, pf_all, cstft_all, ccens_all, tonnetz_all ]
def get_zero_crossing(audio): """ Returns the sum of the zero crossings of the input audio inputs: - audio: the audio file outputs: - the sum of the zero crossings of audio """ return sum(librosa.zero_crossings(audio, pad=False))
def synthesize(beats, piano_roll, fmin=0, bins_per_octave=12, tuning=0.0, wave=None, n=None): '''Synthesize a weighted piano roll''' # Quantize the piano roll sr = 22050 piano_roll = quantize_values(piano_roll) if wave is None: wave = functools.partial(scipy.signal.square, duty=0.5) bins_per_semi = bins_per_octave/12 first_bin = bins_per_semi/2 frequencies = librosa.cqt_frequencies(n_bins=piano_roll.shape[0], fmin=fmin, bins_per_octave=bins_per_octave, tuning=tuning) beats -= beats[0] if n is None: n = beats[-1] + 0.5 * sr beats = librosa.util.fix_frames(beats, x_min=0, x_max=n) beat_intervals = librosa.util.frame(beats, frame_length=2, hop_length=1).T output = np.zeros(n) correction = 2.0 ** (tuning / bins_per_octave) stream = correction * 2.0 * np.pi * np.arange(len(output)) / sr active_bins = piano_roll.sum(axis=1) > 0 for n, freq in enumerate(frequencies): if not active_bins[n * bins_per_semi + first_bin]: continue my_f = freq * stream sine = wave(my_f) # Align beat timings to zero crossings of sine zc_mask = librosa.zero_crossings(sine) beat_f = match_zc(beat_intervals, zc_mask, freq * correction, sr) # Mask out this frequency wherever it's inactive for m, (start, end) in enumerate(beat_f): sine[start:end] *= piano_roll[n*bins_per_semi + first_bin, m] output += sine output = librosa.util.normalize(output) return output, sr
def cal_ZCR(self): n0 = 39000 n1 = 39100 plt.figure(figsize=(20, 5)) plt.plot(self.x[n0:n1]) plt.grid() plt.savefig('zcrPortion.png') plt.show() zero_crossings = librosa.zero_crossings(y=self.x[n0:n1], pad=False) return sum(zero_crossings)
def ZeroCrossings(x, sr): zero_crossings = librosa.zero_crossings(x, pad=False) print("zero crossings:", sum(zero_crossings)) librosa.display.waveplot(x, sr=sr) plt.figure(figsize=(14, 5)) plt.plot(x) if EXPORT_PNG: plt.savefig("zero_crossings.png") return zero_crossings
def get_zero_crossing_rate(x, sr): zerocrossing_temp = librosa.zero_crossings(x, pad=False)[:660000] # goal about 30k elements converted_array = [int(elem) for elem in zerocrossing_temp] # convert T/F to 1/0 bumped_array = [elem * 10000 for elem in converted_array] flatten_zerocrossing = [ np.mean(bumped_array[x:x + 5]) for x in range(0, len(bumped_array), 20) ] return flatten_zerocrossing
def extract_feature_sound(cls, path): # Carrega o som x, sr = librosa.load(path) # Extrai a característica Zero Crossings e soma o resultado zero_crossing = sum(librosa.zero_crossings(x[9000:9100], pad=False)) # Extrai a característica Spectral Centroid e soma o resultado spectral_centroids = sum( librosa.feature.spectral_centroid(x, sr=sr)[0]) # Extrai a característica Spectral Rolloff e soma o resultado spectral_rolloff = sum( librosa.feature.spectral_rolloff(x + 0.01, sr=sr)[0]) # Extrai a característica Mel Frequence - saida = array mel_spectrogram = librosa.feature.melspectrogram(x, sr=sr) # Soma os valores dos arrais em um unico valor e faz a média mel_spectrogram_count = 0 for i in range(len(mel_spectrogram)): for j in range(len(mel_spectrogram[i])): mel_spectrogram_count = mel_spectrogram_count + \ int(mel_spectrogram[i][j]) mel_spectrogram_count = mel_spectrogram_count / len(mel_spectrogram) # Extrai a característica MFCC - saida = array mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), sr=sr) # Soma os valores dos arrais em um unico valor e faz a média mfcc_count = 0 for i in range(len(mfcc)): for j in range(len(mfcc[i])): mfcc_count = mfcc_count + int(mfcc[i][j]) mfcc_count = mfcc_count / len(mfcc) # Extrai a característica Chroma STFT - saida = array chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=512) # Soma os valores dos arrais em um unico valor e faz a média chromagram_count = 0 for i in range(len(chromagram)): for j in range(len(chromagram[i])): chromagram_count = chromagram_count + int(chromagram[i][j]) chromagram_count = chromagram_count / len(chromagram) # Segrega de quem é a som character = "Gato" if path[20] == 'd': character = 'Cachorro' # Exibe o Caminho da Imagem no Console print('Caminho ', path) features = [ int(zero_crossing), int(spectral_centroids), int(spectral_rolloff), int(mel_spectrogram_count), int(mfcc_count), int(chromagram_count), str(character) ] print('Características Extraidas', features) return features
def features(path): x, sr = librosa.load(path) tempo = librosa.beat.tempo(x, sr=sr) zero_crossings = sum(librosa.zero_crossings(x, pad=False)) n_sb = librosa.util.normalize( librosa.feature.spectral_bandwidth(x, sr=sr)[0]) spectral_bandwidth_mean = n_sb.mean() spectral_bandwidth_var = n_sb.var() n_sc = librosa.util.normalize( librosa.feature.spectral_contrast(x, sr=sr)[0]) spectral_contrast_mean = n_sc.mean() spectral_contrast_var = n_sc.var() n_scc = librosa.util.normalize( librosa.feature.spectral_centroid(x, sr=sr)[0]) spectral_centroids_mean = n_scc.mean() spectral_centroids_var = n_scc.var() n_sr = librosa.util.normalize( librosa.feature.spectral_rolloff(x, sr=sr)[0]) spectral_rolloff_mean = n_sr.mean() spectral_rolloff_var = n_sr.var() n_sf = librosa.util.normalize(librosa.feature.spectral_flatness(x)[0]) spectral_flatness_mean = n_sf.mean() spectral_flatness_var = n_sf.var() chroma_stft_mean, chroma_stft_var = mean_var_calculator( librosa.feature.chroma_stft(x, sr=sr)) chroma_cqt_mean, chroma_cqt_var = mean_var_calculator( librosa.feature.chroma_cqt(x, sr=sr)) chroma_cens_mean, chroma_cens_var = mean_var_calculator( librosa.feature.chroma_cens(x, sr=sr)) mfcc_mean, mfcc_var = mean_var_calculator(librosa.feature.mfcc(x, sr=sr)) features = [ tempo[0], zero_crossings, spectral_bandwidth_mean, spectral_bandwidth_var, spectral_contrast_mean, spectral_contrast_var, spectral_centroids_mean, spectral_centroids_var, spectral_rolloff_mean, spectral_rolloff_var, spectral_flatness_mean, spectral_flatness_var ] features.extend(chroma_stft_mean) features.extend(chroma_stft_var) features.extend(chroma_cqt_mean) features.extend(chroma_cqt_var) features.extend(chroma_cens_mean) features.extend(chroma_cens_var) features.extend(mfcc_mean) features.extend(mfcc_var) return features
def get_zero_crossing_rate(self, outside_series=None): """ Return the number of times the signal changes sign :return: Integer """ y = self.select_series(outside_series) zero_crossing = zero_crossings(y, pad=False) return sum(zero_crossing)
def zero_indexes(sample): """ Create zero crossing indexes. We use these in synthesis, and it is easier to make them here. """ zero_indexes = [] for channel_index in range(sample.num_channels): channel = sample.get_all_audio_data()[channel_index] zero_crossings = librosa.zero_crossings(channel) zero_index = np.nonzero(zero_crossings)[0] zero_indexes.append(zero_index) return zero_indexes
def get_zoomed_zero_crossing_rate(self, i, j): """ Return number of time the signal changes sign in a range [i, j] :param i: start point :param j: end point :return: Integer """ zero_crossing = zero_crossings(self.y[i:j], pad=False) return sum(zero_crossing)
def _create_zero_indexes(self): """ Create zero crossing indexes. We use these in synthesis, and it is easier to make them here. """ zero_indexes = [] for channel_index in range(self.num_channels): channel = self.raw_samples[channel_index] zero_crossings = librosa.zero_crossings(channel) zero_index = np.nonzero(zero_crossings)[0] zero_indexes.append(zero_index) return zero_indexes
def get_features(df): """Get features from sensor data For each sensor, peaks, promenences and periodograms features are computed. Parameters: df: pd.DataFrame Dataframe with 10 columns, corresponding to data from sensors Returns: features: list List with features """ features = [] # zeros_crossings features.extend(librosa.zero_crossings(df.values, axis=0).sum(axis=0)) # find_peaks features.extend(df.apply(find_peaks, axis=0).iloc[0, :].apply(len).values) # peak_widths_max λ0 = lambda x: np.max(peak_widths(x, find_peaks(x)[0])[0]) if len( find_peaks(x)[0]) != 0 else 0 features.extend(df.apply(λ0).values) # peak_widths_mean λ01 = lambda x: np.mean(peak_widths(x, find_peaks(x)[0])[0]) if len( find_peaks(x)[0]) != 0 else 0 features.extend(df.apply(λ01).values) # peak_prominences_max λ1 = lambda x: np.max(peak_prominences(x, find_peaks(x)[0])[0]) if len( find_peaks(x)[0]) != 0 else 0 features.extend(df.apply(λ1).values) # peak_prominences_mean λ11 = lambda x: np.mean(peak_prominences(x, find_peaks(x)[0])[0]) if len( find_peaks(x)[0]) != 0 else 0 features.extend(df.apply(λ11).values) # periodogram_max λ2 = lambda x: np.max(periodogram(x[~x.isna()], 100)[1]) if ~x.isna().all( ) else 0 features.extend(np.sqrt(df.apply(λ2).values)) # Es un estimado del RMS # periodogram_mean λ3 = lambda x: np.mean(periodogram(x[~x.isna()], 100)[1]) if ~x.isna().all( ) else 0 features.extend(df.apply(λ3).values) return features
def plot_zcr(y, smp_rate, row=3, col=1, idx=2, **kwargs): zcrs = rft.zero_crossing_rate(y, hop_length=_G.HopLen, frame_length=_G.ZCR_FrameLen, center=_G.ZCR_Center) plt.subplot(row, col, idx) plt.plot(zcrs[0]) plt.xticks([]) plt.xlim([0, _G.PLT_XLIM]) # plt.title('Zero-crossing Rate') zcs = librosa.zero_crossings(y, pad=False) return zcrs
def get_windowed_zcr(data, block_length): num_blocks = int(np.ceil(len(data) / block_length)) w_zcr = [] for i in range(0, num_blocks): start = i * block_length stop = np.min([(start + block_length - 1), len(data)]) zcr = lib.zero_crossings(data[start:stop]) w_zcr.append(len(zcr)) return np.asarray(w_zcr)
def extract_features(file, samp_type, features_arr): # get samples per second and sound data from the wav file sample_rate, signal = wav.read(file) if len(signal.shape ) == 2: # if audio is two channeled we only take the first channel signal = signal.sum(axis=1) / 2 # get data for FFT n = signal.shape[0] # num of samples secs = n / float(sample_rate) # time in seconds ts = 1.0 / sample_rate # sampling interval (time) t = scipy.arange( 0, secs, ts) # gets equally spaced ticks over the sampling interval # perform FFT frequency = fft.fftfreq(signal.size, t[1] - t[0]) amplitude = abs(scipy.fft(signal)) if samp_type != -1: # getting amplitude near 22000 kHz index = 0 # finding the index near 22 kHz and getting corresponding value in the amplitude array while frequency[index] < 21999.9: index += 1 amp = amplitude[index] # amplitude at 22kHz # getting number of frequencies above a threshold magnitude num_freq_above = 0 threshold = 4000 for i in amplitude: if i > threshold: num_freq_above += 1 # getting file and loading in audio data file_path = file data, sr = librosa.load(file_path, sr=44100) # getting zero crossings zero_crossings = sum(librosa.zero_crossings(data, pad=False)) # getting average decibels decibels = librosa.amplitude_to_db(data) avg_decibels = sum(decibels) / len(decibels) # checks if 22kHz feature should be included if samp_type == -1: features_arr.append([num_freq_above, zero_crossings, avg_decibels]) else: features_arr.append( [amp, num_freq_above, zero_crossings, avg_decibels])
def draw_f0_tu_tuong_quan(self, y_or, sr, window_len): duration = float(len(self.y_or)) / sr print("duration:", duration) print("yor", y_or) x = window_len time_ptr = [] f0_list = [] print("start draw ...") while x < duration - window_len * 1.01: # print("x: ", x) try: y_windows = y_or[int((x - window_len / 2) * sr):int((x + window_len / 2) * sr)] z = librosa.zero_crossings(y_windows) # print("zcr:", len(np.nonzero(z)[0])) N = len(y_windows) Xk = np.fft.fft(y_windows) E_fft = np.sum(np.abs(Xk)**2) / N # print("Efft: ",E_fft) if (len(np.nonzero(z)[0]) > 90) or (E_fft < 4): f0_list.append(0) time_ptr.append(x) x += window_len / 2 continue # return 0 R_list = compute_arcf_list(y_windows, start=0, stop=301) d = diff(R_list) start = find(d > 0)[0] max_arcf_loc = argmax(R_list[start:]) + start R_second_list = R_list[(max_arcf_loc + 1):] d = diff(R_second_list) start = find(d > 0)[0] second_arcf_loc = argmax(R_second_list[start:]) + start # print(float(self.sr) / (second_arcf_loc)) f0_list.append(float(self.sr) / (second_arcf_loc + 1)) print("f0: ", float(self.sr) / (second_arcf_loc + 1)) time_ptr.append(x) x += window_len / 2 except Exception as e: print("loi ham draw_f0_tu_tuong_quan::: ", e) time_ptr.append(x) f0_list.append(0) x += window_len / 2 average = np.average(f0_list) print(average, "; ", np.median(f0_list)) self.ax3.set_ylim([0, 400]) self.ax3.scatter(time_ptr, f0_list, s=2) pass
def __test(data, threshold, ref_magnitude, pad, zp): zc = librosa.zero_crossings(y=data, threshold=threshold, ref_magnitude=ref_magnitude, pad=pad, zero_pos=zp) idx = np.flatnonzero(zc) if pad: idx = idx[1:] for i in idx: assert np.sign(data[i]) != np.sign(data[i-1])
def load_audio(audio): list_ = [] index = 0 cols = ["mfkk" + str(i) for i in range(20)] for row in ["zero", "centroid", "rolloff", "chroma"]: cols.append(row) x, sr = librosa.load(os.path.join(app.config['UPLOAD_FOLDER'], audio), duration=5, res_type='kaiser_fast') list_.append([np.mean(x) for x in librosa.feature.mfcc(x, sr=sr)]) list_[index].append(sum(librosa.zero_crossings(x))) list_[index].append(np.mean(librosa.feature.spectral_centroid(x))) list_[index].append(np.mean(librosa.feature.spectral_rolloff(x, sr=sr))) list_[index].append(np.mean(librosa.feature.chroma_stft(x, sr=sr))) return pd.DataFrame(list_, columns=cols)
def zeroCrossings(data: np.ndarray) -> int: """Count how many times the amplitude of the wave crosses the zero threshold. Parameters ---------- data : np.ndarray Audio time series. [shape=(n,)] Returns ------- int Hown many times the wave crossed zero. """ return sum(lr.zero_crossings(data, pad=False))
def get_FFT(self, audio): # x , sr = librosa.load(audio) x, sr = librosa.load(audio, sr=16000) zero_crossings = librosa.zero_crossings(x, pad=False) spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0] spectral_rolloff = librosa.feature.spectral_rolloff(x, sr=sr)[0] contrast = librosa.feature.spectral_contrast(x, sr=sr) bandwidth = librosa.feature.spectral_bandwidth(x, sr=sr) result = np.array([ np.average(zero_crossings), np.average(spectral_centroids), np.average(spectral_rolloff), np.average(contrast), np.average(bandwidth) ]) return result
def plot_zero_crosing_rate(self): #Plot the signal: plt.figure(figsize=(14, 5)) librosa.display.waveplot(self.data, sr=self.sampling_rate) # Zooming in tam = int(len(self.data) / 3) n0 = tam n1 = tam + 100 print(">>>>>>>>>>>>>" + str(n1)) plt.figure(figsize=(14, 5)) plt.plot(self.data[n0:n1]) plt.grid() plt.show() # zero_crossings = librosa.zero_crossings(self.data[n0:n1], pad=False) print(sum(zero_crossings))
def read_song_file(path): data, sample_rate = librosa.load(path) total_samples = np.size(data) #total_seconds = total_samples / sample_rate middle_samples = total_samples / 2 #From position is 15 seconds before the middle from_pos = middle_samples - (15 * sample_rate) #To position is 15 seconds after the middle to_pos = middle_samples + (15 * sample_rate) #Extract data middle_data = data[int(from_pos):int(to_pos)] #Calculate the zero crossing rate zero_crossings = librosa.zero_crossings(y=middle_data, pad=False) zero_crossings = np.count_nonzero(zero_crossings) #Calculate the spectral centroid spectral_centroids = librosa.feature.spectral_centroid(y=middle_data, sr=sample_rate) spectral_centroids.flatten() #Calculate the spectral rolloff spectral_rolloff = librosa.feature.spectral_rolloff(y=middle_data, sr=sample_rate) spectral_rolloff.flatten() #Calculate mel-frequency cepstral coefficients mfccs = librosa.feature.mfcc(y=middle_data, sr=sample_rate) mfccs.flatten() #Calculate the chroma frequencies hop_length = 512 chroma = librosa.feature.chroma_stft(y=middle_data, sr=sample_rate, hop_length=hop_length) chroma.flatten() end_data = np.concatenate( (spectral_centroids, spectral_rolloff, mfccs, chroma), axis=None) np.insert(end_data, 0, zero_crossings) return end_data
def classifier(rewrite=False): #file name, lang, zero_crossing_rate, spectral_centroid, #spectral_rolloff, Mel-Frequency_Cepstral_Coefficients\ #chroma_frequencies if rewrite: columns = ['name', 'lang', 'zero_crossings', 'spectral_centroid', \ 'spectral_rolloff', 'mf1m', 'mf2m', 'mf3m', 'mf4m', 'mf5m', \ 'mf6m', 'mf7m', 'mf8m', 'mf9m', 'mf10m', 'mf11m', 'mf12m', \ 'mf13m', 'mf14m', 'mf15m', 'mf16m', 'mf17m', 'mf18m', 'mf19m', \ 'mf20m', 'mf1v', 'mf2v', 'mf3v', 'mf4v', 'mf5v', 'mf6v', 'mf7v', \ 'mf8v', 'mf9v', 'mf10v', 'mf11v', 'mf12v', 'mf13v', 'mf14v', \ 'mf15v', 'mf16v', 'mf17v', 'mf18v', 'mf19v', 'mf20v', 'cs1m', \ 'cs2m', 'cs3m', 'cs4m', 'cs5m', 'cs6m', 'cs7m', 'cs8m', 'cs9m', \ 'cs10m', 'cs11m', 'cs12m', 'cs1v', 'cs2v', 'cs3v', \ 'cs4v', 'cs5v', 'cs6v', 'cs7v', 'cs8v', 'cs9v', 'cs10v', \ 'cs11v', 'cs12v', 'label','genre'] df = pd.DataFrame(columns=columns) else: df = pd.read_csv('songs.csv') directory = 'library/' w = os.walk(directory) folders = next(w)[1] for folder in tqdm(folders,desc="Folders",leave=False): songs = os.listdir(directory+folder) for song_name in tqdm(songs,desc="Songs",leave=False): if not song_name in list(df['name']): new_row = [] new_row.append(song_name) new_row.append(langdetect.detect(song_name)) song, sr = librosa.load(directory+folder+'/'+song_name) new_row.append(np.mean(librosa.zero_crossings(song))) new_row.append(np.mean(librosa.feature.spectral_centroid(song, sr=sr)[0])) new_row.append(np.mean(librosa.feature.spectral_rolloff(song+0.01, sr=sr)[0])) new_row = new_row + list(librosa.feature.mfcc(song, sr=sr).mean(axis=1)) new_row = new_row + list(librosa.feature.mfcc(song, sr=sr).var(axis=1)) new_row = new_row + list(librosa.feature.chroma_stft(song, sr=sr).mean(axis=1)) new_row = new_row + list(librosa.feature.chroma_stft(song, sr=sr).var(axis=1)) if folder == 'rotation': new_row.append(-1) else: new_row.append(int(folder.split('_')[0])) new_row.append('[]') df.loc[df.index.max()+1] = new_row df.to_csv('songs.csv',index=False)
def test_zero_indexes(): channel = mono_audio.raw_samples[0] zero_crossings = librosa.zero_crossings(channel) zero_index = np.nonzero(zero_crossings)[0] assert(mono_audio.zero_indexes[0].all() == zero_index.all())
def crossfade(audio1, audio2, slices): """ Apply crossfading to 2 audio tracks. The fade function is randomly applied :param audio1: your first signal :param audio2: your second signal :param slices: slices of intervals :returns: - crossfaded audio """ def fade_out(audio): dbs = 20 * np.log10(abs(audio)) thres = max(dbs) db_steps = np.arange(abs(thres), 120) start = 0 try: sections = int(len(dbs)/len(db_steps)) except Exception as e: return audio i = 0 while (start + len(db_steps)) < len(dbs): dbs[start:sections + start] -= db_steps[i] start += sections i += 1 if dbs.argmin() == 0: dbs = dbs[::-1] faded = 10 ** (dbs * 0.05) faded[audio < 0] *= -1 return faded def fade_in(audio): dbs = 20 * np.log10(abs(audio)) try: thres = max(dbs) except Exception as e: return audio dbs = dbs[::-1] db_steps = np.arange(abs(thres), 120) start = 0 try: sections = int(len(dbs)/len(db_steps)) except Exception as e: return audio i = 0 while (start + len(db_steps)) < len(dbs): dbs[start:sections + start] -= db_steps[i] start += sections i += 1 if dbs.argmin() != 0: dbs = dbs[::-1] faded = 10 ** (dbs * 0.05) faded[audio < 0] *= -1 return faded amp1 = np.nonzero(librosa.zero_crossings(audio1))[-1] amp2 = np.nonzero(librosa.zero_crossings(audio2))[-1] amp1 = amp1[librosa.util.match_events(slices[0], amp1)] amp2 = amp2[librosa.util.match_events(slices[1], amp2)] a = [] for i in range(len(amp1)): a.append(list(audio1[slice(amp1[i][0], amp1[i][1])])) a_rev = [] for i in range(len(amp2)): a_rev.append(list(audio2[slice(amp2[i][0], amp2[i][1])])) if choice([0,1]) == 0: amp1= fade_out(np.concatenate(a)) amp2= fade_in(np.concatenate(a_rev)) else: amp2 = fade_in(np.concatenate(a_rev)) amp1 = fade_out(np.concatenate(a)) size = min([len(amp1), len(amp2)]) result = amp1[:size] + amp2[:size] return 0.5 * result / result.max()