def read_all_audio(self): psongs = [['01', '09', 13, 18], ['05', '08', 11, 15], ['07', 15, 16, 20], ['04', 10, 12, 17], ['04', 10, 12, 17], ['05', 11, 19, 20], ['02', '03', '06', 14], ['07', 15, 16, 20], ['05', '08', 11, 15], ['01', '09', 13, 18], ['05', 11, 19, 20], ['02', '03', '06', 14]] for i in range(len(self.fld)): usr = self.fld[i] for snum in psongs[i]: file_path1 = self.root_dir + str(usr) + '/sing/' + str( snum) + '.wav' file_path2 = self.root_dir + str(usr) + '/read/' + str( snum) + '.wav' audio1 = core.load(file_path1, self.sr)[0] audio2 = core.load(file_path2, self.sr)[0] self.all_audio[file_path1] = audio1 self.all_audio[file_path2] = audio2 print "All audio read & stored" f = open('NUS_data_dict.pkl', 'wb') pickle.dump(self.all_audio, f) f.close()
def create_random_data(size=100, max_size=100): rand_voc = np.random.randint(max_size, size=size) rand_bass = np.random.randint(max_size, size=size) rand_drums = np.random.randint(max_size, size=size) rand_other = np.random.randint(max_size, size=size) count = 1 print("Generating random mix...") for i_voc, i_bass, i_drums, i_other in zip( rand_voc, rand_bass, rand_drums, rand_other): inst_files = [FILE_DRUMS, FILE_BASS, FILE_OTHER, FILE_VOCAL] inst_files.remove(TRAIN_TARGET) y_train, _ = load( str(list_source_dir[i_voc] / TRAIN_TARGET), sr=None) y_inst1, _ = load( str(list_source_dir[i_bass] / inst_files[0]), sr=None) y_inst2, _ = load( str(list_source_dir[i_drums] / inst_files[1]), sr=None) y_inst3, _ = load( str(list_source_dir[i_other] / inst_files[2]), sr=None) minsize = min([y_train.size, y_inst1.size, y_inst2.size, y_inst3.size]) y_train = y_train[:minsize] y_inst = y_inst1[:minsize] + \ y_inst2[:minsize] + y_inst3[:minsize] y_mix = y_train + y_inst fname = "dsd_random%02d" % count util.SaveSpectrogramA(y_mix, y_train, fname) print("Saved:" + fname) count += 1
def create_data(generate_high_data=False, aug=False): stretch = [1.1] for mix_dir, source_dir in zip(list_mix_dir, list_source_dir): assert(mix_dir.name == source_dir.name) fname = mix_dir.name print("Processing: " + fname) y_mix, sr = load(str(mix_dir / FILE_MIX), sr=None) y_train, _ = load(str(source_dir / TRAIN_TARGET), sr=None) assert(y_mix.shape == y_train.shape) util.SaveSpectrogramA( y_mix, y_train, fname, original_sr=sr, generate_high_data=generate_high_data) if aug: for st in stretch: y_mix_stretch = stretch_sound(y_mix, st) y_train_stretch = stretch_sound(y_train, st) util.SaveSpectrogramA( y_mix_stretch, y_train_stretch, "%s_stretch%d" % (fname, int( st * 10)), original_sr=sr,)
def read_all_audio(self): print('Reading audio, to collect all of it in a dictionary') psongs = [['01', '09', 13, 18], ['05', '08', 11, 15], ['07', 15, 16, 20], ['04', 10, 12, 17], ['04', 10, 12, 17], ['05', 11, 19, 20], ['02', '03', '06', 14], ['07', 15, 16, 20], ['05', '08', 11, 15], ['01', '09', 13, 18], ['05', 11, 19, 20], ['02', '03', '06', 14]] # List of all singers and their songs for i in range(len(self.fld)): usr = self.fld[i] for snum in psongs[i]: file_path1 = self.root_dir + str(usr) + '/sing/' + str( snum) + '.wav' file_path2 = self.root_dir + str(usr) + '/read/' + str( snum) + '.wav' audio1 = core.load(file_path1, self.sr)[0] audio2 = core.load(file_path2, self.sr)[0] self.all_audio[file_path1] = audio1 self.all_audio[file_path2] = audio2 f = open('NUS_data_dict.pkl', 'wb') # Saved and read from the project directory pickle.dump(self.all_audio, f) f.close() print "All audio read & stored"
def main(argv): os.makedirs(FLAGS.output_dir, exist_ok=True) ''' Initialize model ''' unet = Unet() restore(net=unet, ckpt_path=FLAGS.ckpt_path) ''' Load data ''' mix_wav, _ = load(FLAGS.original_wav, sr=SAMPLE_RATE) mix_wav_mag, mix_wav_phase = magphase(stft(mix_wav, n_fft=WINDOW_SIZE, hop_length=HOP_LENGTH)) mix_wav_mag= mix_wav_mag[:, START:END] mix_wav_phase= mix_wav_phase[:, START:END] '''Load gt ''' if FLAGS.gt == True: gt_wav, _ = load(FLAGS.original_gt, sr=SAMPLE_RATE) gt_wav_mag, gt_wav_phase = magphase(stft(gt_wav, n_fft=WINDOW_SIZE, hop_length=HOP_LENGTH)) gt_wav_mag= gt_wav_mag[:, START:END] gt_wav_phase= gt_wav_phase[:, START:END] '''Save input spectrogram image and gt''' write_wav(FLAGS.output_dir+'original_mix.wav', istft(mix_wav_mag * mix_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH), SAMPLE_RATE, norm=True) spectogram_librosa(FLAGS.output_dir+'original_mix.wav',0) if FLAGS.gt == True: write_wav(FLAGS.output_dir+'gt.wav', istft(gt_wav_mag * gt_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH), SAMPLE_RATE, norm=True) spectogram_librosa(FLAGS.output_dir+'gt.wav',0) ''' run data ''' inputs = mix_wav_mag[1:].reshape(1, 512, 128, 1) mask = unet(inputs).numpy().reshape(512, 128) predict = inputs.reshape(512, 128)*mask ''' evaluation metrics ''' if FLAGS.gt == True: expand_pre = np.expand_dims(predict.flatten(), axis=0) expand_gt = np.expand_dims(gt_wav_mag[1:].flatten(), axis=0) expand_input = np.expand_dims(inputs.flatten(), axis=0) (SDR, SIR, SAR, _) = mir_eval.separation.bss_eval_sources(expand_gt,expand_pre) (SDR2, _, _, _) = mir_eval.separation.bss_eval_sources(expand_gt,expand_input) NSDR = SDR - SDR2 #SDR(Se, Sr) − SDR(Sm, Sr) fout = open(FLAGS.output_dir+'metrics.txt','a') print('*****SDR = '+ str(SDR) + ', SIR = '+ str(SIR) + ', SAR = '+ str(SAR) + ', NSDR = '+ str(NSDR) + '*****') fout.write('*****SDR = '+ str(SDR) + ', SIR = '+ str(SIR) + ', SAR = '+ str(SAR) + ', NSDR = '+ str(NSDR) + '*****') fout.close() ''' Convert model output to target magnitude ''' target_pred_mag = np.vstack((np.zeros((128)), predict)) ''' Write vocal prediction audio files ''' write_wav(FLAGS.output_dir+'pred_vocal.wav', istft(target_pred_mag * mix_wav_phase,win_length=WINDOW_SIZE,hop_length=HOP_LENGTH), SAMPLE_RATE, norm=True) spectogram_librosa(FLAGS.output_dir+'pred_vocal.wav',1)
def logmel(self): from librosa.feature import melspectrogram from librosa.core import load logmel_params = self.config['logmel_params'] sr = logmel_params['sr'] n_fft = logmel_params['n_fft'] hop_length = logmel_params['hop_length'] n_mels = logmel_params['n_mels'] feature_path = os.path.join( self.dataset['feature_path'], 'logmel_{}_{}_{}_{}'.format(sr, n_fft, hop_length, n_mels)) if not os.path.exists(feature_path): os.mkdir(feature_path) x_train = [] y_train = [] f_train = [] for i, row in self.dataset.train_data.iterrows(): print('[Train] {}) Getting logmels from {}...'.format( i, row['cur_name']), end='') wav_name = os.path.join(self.dataset['data_path'], row['cur_name']) wav_data, sr = load(wav_name, sr=sr) x_train.append( melspectrogram(wav_data, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)) y_train.append(self._build_multilabel(row)) f_train.append(row['cur_name']) print('done.') x_test = [] y_test = [] f_test = [] for i, row in self.dataset.test_data.iterrows(): print('[Test] {}) Getting mels from {}...'.format( i, row['cur_name']), end='') wav_name = os.path.join(self.dataset['data_path'], row['cur_name']) wav_data, sr = load(wav_name, sr=sr) x_test.append( melspectrogram(wav_data, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)) y_test.append(self._build_multilabel(row)) f_test.append(row['cur_name']) print('done') self._save_pickles(feature_path, x_train, y_train, f_train, x_test, y_test, f_test)
def comp_lsd(ref_file, pred_file): ref = core.load(ref_file, sr=sr)[0] pred = core.load(pred_file, sr=sr)[0] stft_ref = np.abs( core.stft(ref, n_fft=nfft, hop_length=hop, win_length=wlen)) stft_pred = np.abs( core.stft(pred, n_fft=nfft, hop_length=hop, win_length=wlen)) logstft_ref = np.log(0.1 + stft_ref) logstft_pred = np.log(0.1 + stft_pred[:, :stft_ref.shape[1]]) lsd = np.mean( np.sqrt(np.sum((logstft_ref[7:220] - logstft_pred[7:220])**2, axis=0))) return lsd
def main(): # man 900 middle 750 default 500 frame_length = 900 parser = argparse.ArgumentParser() parser.add_argument('-s', '--speed', type=float, default=1.) parser.add_argument('-t', '--time', type=float, default=-10.) parser.add_argument('-o', '--output', type=str, default='output.wav') parser.add_argument('-i', '--input', type=str, default='input.wav') args = parser.parse_args() parameters = {} input_filename = args.input output_filename = args.output if not os.path.isfile(input_filename): raise RuntimeError('no input file') x, fs = core.load(input_filename) #f0, sp, ap = pw.wav2world(x, fs) frame_length = 1500 #100000 // int(calculateF0(f0)) // 2 * 2 y, sr = core.load(input_filename, sr=fs) onset_frames = onset.onset_detect(x, sr=sr, wait=1, pre_avg=1, post_avg=1, pre_max=1, post_max=1) onset_times = librosa.frames_to_time(onset_frames) plt.plot(y) for i in onset_times: plt.plot([i * 22050, i * 22050], [-1, 1], color="red") S = librosa.stft(x) logS = librosa.amplitude_to_db(abs(S)) plt.savefig('woman.png') if args.time < 0: parameters['origin_time'] = core.get_duration(y, sr) parameters['convert_time'] = parameters['origin_time'] / args.speed else: parameters['origin_time'] = core.get_duration(y, sr) parameters['convert_time'] = args.time parameters['sample_rate'] = sr parameters['frame_length'] = int(fs / 22050 * frame_length) #if parameters['convert_time'] / parameters['origin_time'] > 0.8: convert_upper_threshold(input_filename, output_filename, parameters)
def test_pqmf(): w, sr = load(WAV_FILE) layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) w, sr = load(WAV_FILE) w2 = torch.from_numpy(w[None, None, :]) b2 = layer.analysis(w2) w2_ = layer.synthesis(b2) print(w2_.max()) print(w2_.min()) print(w2_.mean()) sf.write('pqmf_output.wav', w2_.flatten().detach(), sr)
def test_pqmf(): w, sr = load(WAV_FILE) layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) w, sr = load(WAV_FILE) w2 = tf.convert_to_tensor(w[None, None, :]) b2 = layer.analysis(w2) w2_ = layer.synthesis(b2) w2_ = w2.numpy() print(w2_.max()) print(w2_.min()) print(w2_.mean()) sf.write('tf_pqmf_output.wav', w2_.flatten(), sr)
def _prepare(self, apath): audio, _ = load(str(apath), self.sr) audio = self._normalize(audio) audio = audio.astype(np.float32) audio = self._random_crop(audio) return audio
def __getitem__(self, index): (seq, _) = load(self.file_names_wav[index], sr=None, mono=True) gap = max_len - seq.shape[0] #if not gap: wav_tensor = torch.cat([ torch.LongTensor(self.hindsight) \ .fill_(0.), #TODO numpy torch bridge utils.mu_law_encoding( torch.from_numpy(seq), self.q_levels ) ]) ''' else: wav_tensor = torch.cat([ torch.LongTensor(self.hindsight) \ .fill_(0.), #TODO numpy torch bridge utils.mu_law_encoding( torch.from_numpy(seq), self.q_levels ), torch.LongTensor(gap).fill_(0.) ]) ''' spec_tensor = torch.from_numpy(np.load(self.file_names_spec[index], allow_pickle=False)) #TODO add hindsight zeros to the spec_tensor return wav_tensor, spec_tensor
def main(): files = glob(config.data_path+'/*.wav') # + glob('data/*.mp3') # try ffmpeg -i input.mp3 output.wav data, meta = [], [] for file_id, file in enumerate(files): print(f'reading: {file}') d, sample_rate = load(file, config.sample_rate) data.append(d) # synthesis # signal_recons = data_to_audio(data) # write(f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate, signal_recons) # signal_recons, sample_rate = load(f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate) data_min = min([min(d) for d in data]) data_max = max([max(d) for d in data]) data = [(d-data_min)/(data_max-data_min) for d in data] meta.extend([data_min, data_max]) pickle_save([data,meta], config.data_path+('.pk' if config.data_path[-3:]!='.pk' else '')) print('saved data.')
def __data_generation(self, list_IDs_temp): #'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = np.empty((self.batch_size, *self.dim, self.n_channels)) Y = np.empty((self.batch_size, self.n_classes), dtype=np.bool) # Generate data for i, row in list_IDs_temp.iterrows(): if row.path not in self.audio.keys(): #print('{} - loading {}'.format(i, row.path)) #sys.stdout.flush() aud, fs = load(row.path) coefs = melspectrogram(aud, sr=fs, n_fft=2**12, hop_length=2**11, n_mels=64, fmax=10000) self.audio[row.path] = coefs #print('{} - loaded!'.format(i)) #sys.stdout.flush() # we've loaded one more track, add it to the counter self.pbar.update(1) start_ind = np.random.randint(low=0, high=self.audio[row.path].shape[1] - self.window) clip = self.audio[row.path][:, start_ind:start_ind + self.window] # # start_ind = np.random.randint(low=0,high=coefs.shape[1]-self.window) # clip = coefs[:,start_ind:start_ind+self.window] X[i, :, :, 0] = clip Y[i, :] = row.iloc[2:-1].values.astype(np.int64) return X, Y
def get_features(filename, *, winlen, winstep, n_mcep, mcep_alpha, minf0, maxf0, type): wav, sr = load(filename, sr=None) # get f0 x = wav.astype(float) _f0, t = world.harvest(x, sr, f0_floor=minf0, f0_ceil=maxf0, frame_period=winstep * 1000) f0 = world.stonemask(x, _f0, t, sr) window_size = int(sr * winlen) hop_size = int(sr * winstep) # get mel if type == 'mcc': spec = world.cheaptrick(x, f0, t, sr, f0_floor=minf0) h = sptk.sp2mc(spec, n_mcep - 1, mcep_alpha).T else: h = mfcc(x, sr, n_mfcc=n_mcep, n_fft=window_size, hop_length=hop_size) h = np.vstack((h, f0)) maxlen = len(x) // hop_size + 2 h = repeat_last_padding(h, maxlen) id = os.path.basename(filename).replace(".wav", "") return (id, x, h)
def load_wavs(file_path, sr): files = librosa.util.find_files(file_path, ext="wav") # wav 파일 불러오기 (제너레이터로 변경) wavs = (load(path=wav, sr=sr)[0] for wav in files) print('Wave Loading Complete') return wavs
def compute_features(data_loc='../data/genres/'): file_names = glob.glob(data_loc + '*/*.au') file_names.sort() assert len( file_names ) == 1000, "ERROR: Couldn't read files properly. Is your data_loc correct?" # Setup some vars sampleRate = 22050 n_fft = 1024 X = [] genres_list = list(song_labels_dic.keys()) genres_list.sort() genre_flag = 0 if not os.path.exists('../ckpt'): os.makedirs('../ckpt') for file in file_names: song, _ = lc.load(file) song_dft = np.abs(lc.stft(song, n_fft=n_fft)) X.append(song_dft) if len(X) == 100: print('Writing: ' + genres_list[genre_flag] + '.pkl file...') with open('../ckpt/' + genres_list[genre_flag] + '.pkl', 'wb') as f: pickle.dump(X, f) X = [] genre_flag = genre_flag + 1 return True
def plot_audio(filepath): x, fs = load(filepath, sr = None, mono = True) plt.figure(figsize=(16,4)) waveplot(x,sr=fs) plt.title("Waveform for {}".format(filepath)) plt.tight_layout() plt.show()
def load_wavs(file_path, sr): wavs = [] file = librosa.util.find_files(file_path, ext="wav") for wav in file: audio, _ = load(path=wav, sr=sr) wavs.append(audio) return wavs
def process_sounds(self): ''' processes dowloaded files below self.root after running download_files(). DEPRECATED Don't use this for the pretrained VGGish! TODO: this should go to preprocessing if kept at all ''' self.info_df = self.df[['gen', 'id']].copy() for path, dirs, files in os.walk(self.root): for file in files: if file.endswith('.mp3'): y, sr = load(os.path.join(path, file)) if self.convert_to_wav: write_wav( os.path.join(path, file.replace('.mp3', '.wav')), y, self.input_sr) if self.make_mel_spec: S = librosa.feature.melspectrogram( y, sr=self.sr, n_mels=self.n_mels, hop_length=self.hop_length) log_S = librosa.amplitude_to_db(S, ref=np.max) np.save(os.path.join(path, 'mel_spec.npy'), log_S) if self.save_img: scipy.misc.imsave( os.path.join(path, 'mel_spec.jpg'), log_S) if self.extract_chunks: if log_S.shape[1] < self.len_chunks: print( 'recording {} has length {} which is shorter \ than required chunk length.') continue self.spec_chunks(log_S, path=path) self.info_df.to_csv(os.path.join(self.root, 'info.csv'), sep='\t')
def load_wav(file_path: str) -> Tuple[int, np.ndarray]: ''' reads in a .wav file, returns the sample rate and signal ''' y, sample_rate = load(file_path) return sample_rate, y
def calculate_spectrograms(audio_dir, out_dir, file_type='.mp3'): files = glob.glob(os.path.join(audio_dir, '*' + file_type)) num_files = len(files) print(f'{num_files} audio files found') if not os.path.exists(out_dir): os.mkdir(out_dir) for i, file_name in enumerate(sorted(files)): start_time = time.time() track_name = os.path.basename(file_name) track_id = os.path.splitext(track_name)[0] try: song_name = track_to_song[track_id] except KeyError: continue if song_name in wmf_item2i.keys(): audio_file = os.path.join(audio_dir, track_name) out_file = os.path.join(out_dir, track_id) + '.npy' if not os.path.exists(out_file): y, sr = load(audio_file) mel_spectrogram = melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=512, n_mels=128) wmf_item2i = pickle.load(open('../../index_dicts.pkl', 'rb'))['item2i'] track_to_song = pickle.load(open('../../track_to_song.pkl', 'rb')) calculate_spectrograms(audio_dir='../../data/MillionSongSubset/audio', out_dir='../../data/MillionSongSubset/spectrograms')
def mp3_spec_file(filename): x, sr = load(filename) S = librosa.stft(x, N_FFT) p = np.angle(S) S = np.log1p(np.abs(S)) return S, sr
def one_wave_load_func(path): wave = load(f'{data_dir}/{path[0]}', sr=sr)[0] wave = cut_pad_wave(wave, const_length) assert wave.shape[ 0] == const_length, f'{wave.shape[0]}, {const_length}' return wave.reshape((1, -1))
def get_beat_sync_spectrums(audio): """ Returns a beat-sync 3-energy-band spectrogram :param audio: Path to the song :return: Array containing energy in band1, band2, band3 """ y, sr = core.load(audio, sr=44100) eql_y = EqualLoudness()(y) tempo, framed_dbn = self_tempo_estimation(y, sr) np.append(framed_dbn, np.array(len(y)/sr)) band1 = (0, 220) band2 = (220, 1760) band3 = (1760, sr / 2) band1list = [] band2list = [] band3list = [] for i in range(1, len(framed_dbn)): fft_eq = abs(np.fft.fft(eql_y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)])) freqs = np.fft.fftfreq(len(fft_eq), 1 / sr) band1list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band1[0], freqs < band1[1]))]**2)))) band2list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band2[0], freqs < band2[1]))]**2)))) band3list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band3[0], freqs < band3[1]))]**2)))) band1list = np.array(band1list).transpose() band2list = np.array(band2list).transpose() band3list = np.array(band3list).transpose() return np.vstack([band1list, band2list, band3list])
def generate_cqt(i, file_path, offset=0, duration=None): print('[{}] Opening'.format(i), file_path) data, sample_rate = load(file_path, sr=None, offset=offset, duration=duration) print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape) if len(data.shape) == 2: with Timer('[{}] Converted to mono'.format(i)): print('[{}] Converting to mono channel...'.format(i)) data = to_mono(data) with Timer('[{}] Resampling'.format(i)): print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...') downsampled_data = resample(data, orig_sr=sample_rate, target_sr=TARGET_SAMPLE_RATE) # downsampled_data = data print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE, 'Hz shape is now', downsampled_data.shape) with Timer('[{}] CQT'.format(i)): print('[{}] Generating CQT...'.format(i)) cqt_result = np.abs( cqt(downsampled_data, sr=TARGET_SAMPLE_RATE, hop_length=HOP_LENGTH, n_bins=TOTAL_BINS, bins_per_octave=BINS_PER_OCTAVE)) return cqt_result
def LoadAudio(fname): y, sr = load(fname, sr=C.SR) spec = stft(y, n_fft=C.FFT_SIZE, hop_length=C.H, win_length=C.FFT_SIZE) mag = np.abs(spec) mag /= np.max(mag) phase = np.exp(1.j * np.angle(spec)) return mag, phase
def train(): print('Loading dataset: {} ...'.format(WAV_FILE)) samples, _ = libcore.load(WAV_FILE, sr=SAMPLING_RATE) power = np.mean(samples ** 2) * 0.5 print('Sampling training set nb_samples={}, size=({},{}) ...'.format(TS_SIZE, SEQ_LEN, INPUT_DIM)) training_set = np.array([sample_chunk(samples, power).T for _ in range(TS_SIZE)]) print('Constructing autoencoder ...') inputs = Input(shape=(SEQ_LEN, INPUT_DIM)) enc_1 = GRU(128)(inputs) features = RepeatVector(SEQ_LEN)(enc_1) dec_0 = GRU(128, return_sequences=True)(features) dec_1 = GRU(INPUT_DIM, return_sequences=True)(dec_0) autoencoder = Model(inputs, dec_1) autoencoder.summary() autoencoder.compile(optimizer='rmsprop', loss='mse') model_cb = ModelCheckpoint(WEIGHT_FILE_PATTERN, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=SAVE_AFTER) print('Training autoencoder for {} epochs. Save each {}th epoch ...'.format(T_EPOCHS, SAVE_AFTER)) history = autoencoder.fit(training_set, training_set, nb_epoch=T_EPOCHS, validation_split=0.1, callbacks=[model_cb])
def LoadAudio(fname): y, sr = load(fname, sr=C.SR) spec = stft(y, n_fft=C.FFT_SIZE, hop_length=C.H, win_length=C.FFT_SIZE) mag = np.abs(spec) mag /= np.max(mag) phase = np.exp(1.j*np.angle(spec)) return mag, phase
def make_blocking_data(self): xData, yData = list(), list() path = self.featurePath + self.name + '/' for j, filename in enumerate(os.listdir(path)): print(f"{self.name} {filename} ({j + 1})") WavPath = path + filename y, sr = load(WavPath, mono=True) S = melspectrogram(y, sr).T S = S[:-1 * (S.shape[0] % 128)] num_chunk = S.shape[0] / 128 data_chunks = np.split(S, num_chunk) xChunks, yChunks = list(), list() for unit in data_chunks: xChunks.append(unit) yChunks.append(self.labelDict[self.name]) xData.append(xChunks) yData.append(yChunks) xData = [unit for record in xData for unit in record] yData = [unit for record in yData for unit in record] self.features = torch.tensor(data=xData, device=device) self.labels = torch.tensor(data=yData, device=device) print(self.features.shape) print(self.labels.shape) self.x_cat_data.append(self.features) self.y_cat_data.append(self.labels) return
def load_audio(fname): y = load(fname, sr=16000)[0] spec = stft(y, n_fft=1024, hop_length=512, win_length=1024) spec = np.pad(spec, [(0, 0), (0, 1024 - spec.shape[1] % 1024)], 'constant') mag = np.abs(spec) mag /= np.max(mag) phase = np.exp(1.j * np.angle(spec)) return mag, phase, y.shape[0]
def mix_maker(playlist, rootDir): """ Creates a seamless mix of all the songs in a playlist. Songs crossfade into one another. The function assumes similar BPM and a sample rate of 44100 Hz. Input Parameters ------------------------ playlist: list of paths to files containing songs for analysis and mixing. Returns ------------------------ a continuous mix of all songs as one audiofile. """ #sample_list = np.zeros(len(playlist), dtype = object) #mix = [] if '.mp3' in playlist[0]: print(1) else: playlist = playlist[1:] playlist_length = len(playlist) print('iteration 0:') samples, sr = load(rootDir+"/" +playlist[0], 44100) mix = samples playlist = playlist[1:] playlist_length = len(playlist) x = 0 while playlist_length > 0: print("iteration: ") x = x+1 print(x) print('playlist_length') print(len(playlist)) samples, sr = load(rootDir+"/" +playlist[0],44100) mix = beat_match(mix, samples, sr) playlist = playlist[1:] playlist_length = len(playlist) return mix
def __computedata(self, path, samplemetadata): meta, pitch = samplemetadata audiodat = lrco.load(join(path, meta[0]), sr=self.sr, offset=meta[1], duration=meta[2]) audiodat = ExtractMonoAudioFiles.featurefunc(*audiodat).T # pitchvect = np.array([pitch] * audiodat.shape[0]) #return (audiodat, pitch) return {'features': audiodat, 'label': pitch}
list_source_dir.extend([os.path.join(PATH_DSD_SOURCE[1], f) for f in os.listdir(PATH_DSD_SOURCE[1])]) list_source_dir = sorted(list_source_dir) list_mix_dir = [os.path.join(PATH_DSD_MIXTURE[0], f) for f in os.listdir(PATH_DSD_MIXTURE[0])] list_mix_dir.extend([os.path.join(PATH_DSD_MIXTURE[1], f) for f in os.listdir(PATH_DSD_MIXTURE[1])]) list_mix_dir = sorted(list_mix_dir) for mix_dir, source_dir in zip(list_mix_dir, list_source_dir): assert(mix_dir.split("/")[-1] == source_dir.split("/")[-1]) fname = mix_dir.split("/")[-1] print("Processing: " + fname) y_mix, sr = load(os.path.join(mix_dir, FILE_MIX), sr=None) y_vocal, _ = load(os.path.join(source_dir, FILE_VOCAL), sr=None) y_inst = sum([load(os.path.join(source_dir, f), sr=None)[0] for f in [FILE_DRUMS, FILE_BASS, FILE_OTHER]]) assert(y_mix.shape == y_vocal.shape) assert(y_mix.shape == y_inst.shape) util.SaveSpectrogram(y_mix, y_vocal, y_inst, fname) rand_voc = np.random.randint(100, size=50) rand_bass = np.random.randint(100, size=50) rand_drums = np.random.randint(100, size=50) rand_other = np.random.randint(100, size=50)
def main(): samples, sr = load(rootDir+"/" +playlist[0], 44100) song2 = fade(sample, type = "in", end = beat2[32])
from os import path import numpy as np import soundfile as sf from audio_helpers import play_audio from librosa.effects import pitch_shift from librosa.core import load from librosa.output import write_wav DIR = 'data/wave' fn = '2.wav' base_name, ext = path.splitext(fn) FN = path.join(DIR, fn) FN_NEW = path.join(DIR, '{}_shifted{}'.format(base_name, ext)) x, fs = load(FN) print "Script loaded file with fs {}".format(fs) def to_pcm(x): max_val = np.iinfo(np.int16).max return (x * max_val).astype(np.int16) shifted = pitch_shift(x, fs, 2) sf.write(FN_NEW, shifted, fs, subtype="PCM_24") play_audio(FN_NEW)
# -*- coding: utf-8 -*- """ Created on Sat May 7 16:45:29 2016 @author: parallels """ import functions from librosa.util import find_files from librosa.core import load audiofilelist = find_files("database/audios/",ext = "wav") print "saving peaks...." for audiofile in audiofilelist: y,sr = load(audiofile) filename = audiofile.split("/")[-1]+".npy" # -1 means the last name of the directory functions.save_maximum_array(y,filename) print "saved:" + filename
# -*- coding: utf-8 -*- """ Created on Sat May 7 13:51:42 2016 @author: parallels """ import numpy as np from librosa.core import load,stft import matplotlib.pyplot as plt from librosa.display import specshow import functions #from scipy.spatial.distance import euclidean y,sr = load("wiwym.wav") rec,sr = load("recording.wav") y = y[:sr*30] spec = np.abs(stft(y,n_fft = 4960,hop_length = 512)) query = np.abs(stft(rec,n_fft = 4960,hop_length = 512)) maximum_spec = find_peak(spec,30) maximum_query = find_peak(query,30) plt.plot(overlap)
stem_inst = [] stems_path = os.path.join(PATH_MENDLEY, songname, data["stem_dir"]) mixfilename = data["mix_filename"] for s in data["stems"]: stem = data["stems"][s] fname = stem["filename"] print( "stem: %s %s %s" % (fname, stem["component"], stem["instrument"])) if (stem["instrument"].find("male") >= 0) or \ (stem["instrument"].find("singer") > 0): stem_voc.append(fname) all_voctracks.append(fname) print("Is vocal!") else: stem_inst.append(fname) all_insttracks.append(fname) print("detected vocals:") print(stem_voc) if (len(stem_voc) == 0) or (len(stem_inst) == 0): print("empty vocal or inst...skip") continue audio_vocal = sum([load(os.path.join(stems_path, f), sr=None, mono=True)[0] for f in stem_voc]) audio_inst = sum([load(os.path.join(stems_path, f), sr=None, mono=True)[0] for f in stem_inst]) audio_mix, _ = load(os.path.join(PATH_MENDLEY, songname, mixfilename), sr=None, mono=True) util.SaveSpectrogram(audio_mix, audio_vocal, audio_inst, songname)