def generate_cqt(i, file_path, offset=0, duration=None): print('[{}] Opening'.format(i), file_path) data, sample_rate = load(file_path, sr=None, offset=offset, duration=duration) print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape) if len(data.shape) == 2: with Timer('[{}] Converted to mono'.format(i)): print('[{}] Converting to mono channel...'.format(i)) data = to_mono(data) with Timer('[{}] Resampling'.format(i)): print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...') downsampled_data = resample(data, orig_sr=sample_rate, target_sr=TARGET_SAMPLE_RATE) # downsampled_data = data print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE, 'Hz shape is now', downsampled_data.shape) with Timer('[{}] CQT'.format(i)): print('[{}] Generating CQT...'.format(i)) cqt_result = np.abs( cqt(downsampled_data, sr=TARGET_SAMPLE_RATE, hop_length=HOP_LENGTH, n_bins=TOTAL_BINS, bins_per_octave=BINS_PER_OCTAVE)) return cqt_result
def generate_cqt(file_path, st_status): st_status.text('Opening {}'.format(file_path)) data, sample_rate = auto_load(file_path, sr=None) print('Sample Rate:', sample_rate, 'shape:', data.shape) if len(data.shape) == 2: print('Converting to mono channel...') data = to_mono(data) st_status.text('Resampling to {} Hz...'.format(TARGET_SAMPLE_RATE)) downsampled_data = resample(data, orig_sr=sample_rate, target_sr=TARGET_SAMPLE_RATE) # downsampled_data = data st_status.text('Downsampled to {} Hz, shape is now {}'.format( TARGET_SAMPLE_RATE, downsampled_data.shape)) st_status.text('Generating CQT...') cqt_result = np.abs( cqt(downsampled_data, sr=TARGET_SAMPLE_RATE, hop_length=HOP_LENGTH, n_bins=TOTAL_BINS, bins_per_octave=BINS_PER_OCTAVE)) return cqt_result
def calculateFeaturesForDownbeat(audio): datarow = [] BINS_PER_OCTAVE = 12 NUM_OCTAVES = 5 spectogram = cqt(audio, sr=44100, bins_per_octave=BINS_PER_OCTAVE, n_bins=BINS_PER_OCTAVE*NUM_OCTAVES, hop_length=512) spec_db = np.absolute(spectogram) spec_db = librosa.amplitude_to_db(spectogram, ref=np.max) max_spec = np.max(spec_db) min_spec = np.min(spec_db) if max_spec != min_spec: spec_db = (spec_db - min_spec) / (max_spec - min_spec) else: print 'Max equals min!' spec_db = (spec_db - np.min(spec_db)) # Statistics for each frequency bin (each row => aggregate over axis 1 = columns) NUM_FRAMES = spectogram.shape[1] features_per_half_octave = [] window_features = np.zeros((4*4*2-1,NUM_OCTAVES*2)) for i_freq in range(NUM_OCTAVES*2): # Frequency resolution: every half octave; hop size quarter octave features_cur_freq_window = [] # Window along frequency axis f_bin_start = (i_freq * BINS_PER_OCTAVE) / 2 f_bin_end = ((i_freq + 1) * BINS_PER_OCTAVE) / 2 for i_time in range(4 * 4 * 2 - 1): # Aggregate along time axis in frames of one quarter beat, hop size an eight dbeat # Window along time axis frame_start = int(i_time * NUM_FRAMES / 32.0) frame_end = int((i_time + 2) * NUM_FRAMES / 32.0) # Current section of spectogram cur_w = spec_db[f_bin_start:f_bin_end,frame_start:frame_end] window_features[i_time,i_freq] = np.mean(cur_w) # Determine how 'loud' this window is approx freq_axis_features = [] freq_axis_features.extend(np.mean(window_features,axis=1)) freq_axis_features.extend(np.var(window_features,axis=1)) freq_axis_features.extend(skew(window_features,axis=1)) freq_axis_features.extend(kurtosis(window_features,axis=1)) time_means = np.mean(window_features,axis=0) time_vars = np.var(window_features, axis=0) freq_axis_features.extend([np.mean(time_means), np.var(time_vars), np.mean(time_vars), np.var(time_vars)]) datarow = freq_axis_features # Statistics for each time point (each column => aggregate over axis 0 = rows) #~ datarow.extend(np.std(spec_db, axis=0)) #~ datarow.extend(skew(spec_db, axis=0)) #~ datarow.extend(kurtosis(spec_db, axis=0)) # Show if isPlot: plt.figure() plt.subplot(2,1,1) librosa.display.specshow(spec_db, x_axis='time', y_axis='cqt_note') plt.subplot(2,1,2) #~ cosine_distances_t = [distance.cosine(spec_db[i+1,:], spec_db[i,:]) for i in range(spec_db.shape[0]-1)] plt.plot(datarow[::4]) plt.plot(datarow[1::4]) plt.plot(datarow[2::4]) plt.plot(datarow[3::4]) return datarow
def __call__(self, data): constantq = cqt(data, sr=self.sampling_rate, hop_length=self.hop_length, n_bins=self.n_bins, bins_per_octave=self.bins_per_octave, window=self.window) constantq = np.abs(constantq) if self.is_abs is True else constantq return constantq
def calcAndWriteFeatureFile(audio, filename): ''' Generate the CQT matrix and write it as a binary array to a file, so that it can be later read by tensorflow with a fixedlengthrecordreader ''' spectogram = cqt(audio, sr=44100, bins_per_octave=12, n_bins= 84) spec_db = librosa.amplitude_to_db(spectogram, ref=np.max) #~ librosa.display.specshow(spec_db, sr=44100) #~ plt.show() spec_db.tofile(filename)
# FFT fourier = fft.fft(channel1) #print(fourier) """plt.figure() plt.plot(fourier, alpha=0.9, color='blue') plt.xlabel('k') plt.ylabel('Amplitude') plt.show() """ # CQT # On charge le fichier wav avec librosa x, sr = librosa.load( "test.wav", sr=44100, mono=True) # mono=True transforme l'audio en mono (à faire) cqt = librosa.cqt(x, sr=sr, bins_per_octave=36) log_cqt = librosa.amplitude_to_db(np.abs(cqt)) # Spectrogram FFT """ plt.figure(2, figsize=(8,6)) plt.subplot(211) Pxx, freqs, bins, im = plt.specgram(channel1, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma')) cbar=plt.colorbar(im) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') cbar.set_label('Intensity dB') plt.subplot(212) Pxx, freqs, bins, im = plt.specgram(channel2, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma')) cbar=plt.colorbar(im) plt.xlabel('Time (s)')
itemcnt = len(midilist) i = 5000 for midifile in midilist: i += 1 print("Processing %d" % (i)) wav, sr = load(midifile, sr=C.SR) try: pm = pretty_midi.PrettyMIDI(midifile) wav = pm.fluidsynth(fs=C.SR) target = chromatemplate.GetConvnetTargetFromPianoroll( utils.GetPianoroll(midifile)) fmin = note_to_hz("C1") spec = np.vstack([ np.abs( cqt(wav, sr=C.SR, hop_length=C.H, n_bins=C.BIN_CNT, bins_per_octave=C.OCT_BIN, fmin=fmin * (h + 1))).T.astype(np.float32) for h in range(C.CQT_H) ]) except (Exception): print("Got error.Skip...") continue minsz = min([spec.shape[1], target.shape[1]]) np.savez(C.PATH_MIDIHCQT + "additional/" + "%06d.npz" % i, spec=spec[:minsz], target=target[:minsz])
import sys import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display from librosa.core import cqt START = 44100 * 50 audio, sr = librosa.load(sys.argv[1], sr=44100) audio = audio[START:START + int(sys.argv[2]) * 60480] FRAME_SIZE = 1024 HOP_SIZE = FRAME_SIZE / 2 #~ spectogram = np.array([np.log(spectrum(w(frame))) for frame in frames]) spectogram = cqt(audio, sr=44100, bins_per_octave=12, n_bins=84) spec_db = librosa.amplitude_to_db(spectogram, ref=np.max) print(np.shape(spec_db), spec_db.dtype) librosa.display.specshow(spec_db, sr=44100) #~ plt.imshow(spec_db, aspect='auto') plt.show()
parser.add_argument("-d", help="Name of the BLSTM-CRF decoder parameter file. (default: 'nblstm_crf.model')",\ type=str, default="nblstm_crf.model", action="store") args = parser.parse_args() audio_list = find_files("Datas/audios_estimation") for audiofile in audio_list: fname = audiofile.split("/")[-1] print("Processing: %s" % fname) #load audio y,sr = load(audiofile,sr=C.SR) #extract Harmonic-CQT from audio fmin = note_to_hz("C1") hcqt = np.stack([np.abs(cqt(y,sr=C.SR,hop_length=C.H,n_bins=C.BIN_CNT,bins_per_octave=C.OCT_BIN,fmin=fmin*(h+1),filter_scale=2,tuning=None)).T.astype(np.float32) for h in range(C.CQT_H)]) #extract feature using trained CNN extractor cnn_feat_extractor = N.FullCNNFeatExtractor() cnn_feat_extractor.load(args.f) feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data #decode label sequence decoder = N.NBLSTMCRF() decoder.load(args.d) labels = decoder.argmax(feat) #convert into .lab file labfile = os.path.join("Datas/labs_estimated",fname+".lab")
import chromatemplate config.train = False config.enable_backprop = False audiofile = "/home/wuyiming/Projects/ChordData/Audio/16_RWC/050.wav" wav, sr = load(audiofile, sr=C.SR) fmin = note_to_hz("C1") spec = U.PreprocessSpec( np.stack([ np.abs( cqt(wav, sr=C.SR, hop_length=C.H, n_bins=C.BIN_CNT, bins_per_octave=C.OCT_BIN, fmin=fmin * (h + 1), filter_scale=2, tuning=None)).T.astype(np.float32) for h in range(C.CQT_H) ])) spec_dnn = U.Embed(U.PreprocessSpec( np.abs( cqt(wav, sr=C.SR, hop_length=C.H, n_bins=144, bins_per_octave=24, filter_scale=2, tuning=None)).T.astype(np.float32)), size=1)
timing.append(time_used) print("mean = ", np.mean(timing)) print("std = ", np.std(timing)) data = pd.DataFrame(timing, columns=['t_avg']) data['Type'] = f'torch_{args.device}' data.to_csv( Path(__file__).parent / f'./result/CQT2010v1_torch_{args.device}') elif args.device == "librosa": spec_list = [] timing = [] for e in range(5): t_start = time.time() for i in tqdm.tqdm(y_list, leave=True): spec = cqt(i, sr=44100, n_bins=84, bins_per_octave=24, fmin=55) spec_list.append(abs(spec)) time_used = time.time() - t_start print(time_used) timing.append(time_used) print("mean = ", np.mean(timing)) print("std = ", np.std(timing)) data = pd.DataFrame(timing, columns=['t_avg']) data['Type'] = 'librosa' data.to_csv(Path(__file__).parent / f'./result/librosa_CQT') else: print("Please select a correct device")