def MeExt(filepath, model_type='vocal', model_path='./pretrain_model/MSnet_vocal', GPU=True, mode='std'): if 'std' in mode: data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type) elif 'fast' in mode: data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=22050, hop=512) print('Melody extraction with Melodic Segnet ...') if 'vocal' in model_type: Net = model.MSnet_vocal() elif 'melody' in model_type: Net = model.MSnet_melody() else: print( "Error: Wrong type of model. Please assign model_type = 'vocal' or 'melody'" ) return None if GPU: Net.cuda() else: Net.cpu() Net.float() Net.eval() Net.load_state_dict(torch.load(model_path)) frames = data.shape[-1] if frames > 5120: seg_x = seg(data) seg_y = [] for batch_x in seg_x: batch_x = batch_x[np.newaxis, :] batch_x = torch.from_numpy(batch_x).float() if GPU: batch_x = batch_x.cuda() pred_y, emb = Net(batch_x) pred_y = pred_y.cpu().detach().numpy() seg_y.append(pred_y) pred_y = iseg(seg_y) else: batch_x = data[np.newaxis, :] batch_x = torch.from_numpy(batch_x).float() if GPU: batch_x = batch_x.cuda() pred_y, emb = Net(batch_x) pred_y = pred_y.cpu().detach().numpy() est_arr = est(pred_y, CenFreq, time_arr) return est_arr
def main(data_folder, model_type, output_folder): if 'vocal' in model_type: train_ids, val_ids, test_ids = get_split_lists_vocal(data_folder) xlist = [] ylist = [] for chunk_id in train_ids: filepath = data_folder + '/audio/synth_mix_' + chunk_id + '.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) ypath = data_folder + '/annotations/melody/synth_mix_' + chunk_id + '.csv' lpath = data_folder + '/annotations/activations/synth_mix_' + chunk_id + '.lab' ref_arr = select_vocal_track(ypath, lpath) gt_map = seq2map(ref_arr[:, 1], CenFreq) xlist, ylist = batchize(data, gt_map, xlist, ylist, size=430) xlist = np.array(xlist) ylist = np.array(ylist) hf = h5py.File('./data/train_vocal.h5', 'w') hf.create_dataset('x', data=xlist) hf.create_dataset('y', data=ylist) hf.close() xlist = [] ylist = [] for chunk_id in val_ids: filepath = data_folder + '/audio/synth_mix_' + chunk_id + '.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) data = batchize_val(data) ypath = data_folder + '/annotations/melody/synth_mix_' + chunk_id + '.csv' lpath = data_folder + '/annotations/activations/synth_mix_' + chunk_id + '.lab' ref_arr = select_vocal_track(ypath, lpath) xlist.append(data) ylist.append(ref_arr) with open(output_folder+'/val_x_vocal.pickle', 'wb') as fp: pickle.dump(xlist, fp) with open(output_folder+'/val_y_vocal.pickle', 'wb') as fp: pickle.dump(ylist, fp) elif 'melody' in model_type: train_ids, val_ids, test_ids = get_split_lists() xlist = [] ylist = [] for chunk_id in train_ids: filepath = data_folder + '/audio/synth_mix_' + chunk_id + '.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) ypath = data_folder + '/annotations/melody/synth_mix_' + chunk_id + '.csv' ref_arr = csv2ref(ypath) gt_map = seq2map(ref_arr[:,1], CenFreq) xlist, ylist = batchize(data, gt_map, xlist, ylist, size=430) xlist = np.array(xlist) ylist = np.array(ylist) hf = h5py.File(output_folder+'/train.h5', 'w') hf.create_dataset('x', data=xlist) hf.create_dataset('y', data=ylist) hf.close() xlist = [] ylist = [] for chunk_id in val_ids: filepath = data_folder + '/audio/synth_mix_' + chunk_id + '.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) data = batchize_val(data) ypath = data_folder + '/annotations/melody/synth_mix_' + chunk_id + '.csv' ref_arr = csv2ref(ypath) xlist.append(data) ylist.append(ref_arr) with open(output_folder+'/val_x.pickle', 'wb') as fp: pickle.dump(xlist, fp) with open(output_folder+'/val_y.pickle', 'wb') as fp: pickle.dump(ylist, fp)
def main(data_folder, model_type, output_folder): if 'vocal' in model_type: train_songlist, val_songlist, test_songlist = getlist_mdb_vocal() xlist = [] ylist = [] for songname in train_list: filepath = data_folder + '/Audio/' + songname + '/' + songname + '_MIX.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) ypath = data_folder + '/Annotations/Melody_Annotations/MELODY2/' + songname + '_MELODY2.csv' lpath = data_folder + '/Annotations/Instrument_Activations/SOURCEID/' + songname + '_SOURCEID.lab' ref_arr = select_vocal_track(ypath, lpath) gt_map = seq2map(ref_arr[:, 1], CenFreq) xlist, ylist = batchize(data, gt_map, xlist, ylist, size=430) xlist = np.array(xlist) ylist = np.array(ylist) hf = h5py.File('./data/train_vocal.h5', 'w') hf.create_dataset('x', data=xlist) hf.create_dataset('y', data=ylist) hf.close() xlist = [] ylist = [] for songname in val_songlist: filepath = data_folder + '/Audio/' + songname + '/' + songname + '_MIX.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) data = batchize_val(data) ypath = data_folder + '/Annotations/Melody_Annotations/MELODY2/' + songname + '_MELODY2.csv' lpath = data_folder + '/Annotations/Instrument_Activations/SOURCEID/' + songname + '_SOURCEID.lab' ref_arr = select_vocal_track(ypath, lpath) xlist.append(data) ylist.append(ref_arr) with open(output_folder + '/val_x_vocal.pickle', 'wb') as fp: pickle.dump(xlist, fp) with open(output_folder + '/val_y_vocal.pickle', 'wb') as fp: pickle.dump(ylist, fp) elif 'melody' in model_type: train_songlist, val_songlist, test_songlist = getlist_mdb() xlist = [] ylist = [] for songname in train_list: filepath = data_folder + '/Audio/' + songname + '/' + songname + '_MIX.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) ypath = data_folder + '/Annotations/Melody_Annotations/MELODY2/' + songname + '_MELODY2.csv' ref_arr = csv2ref(ypath) gt_map = seq2map(ref_arr[:, 1], CenFreq) xlist, ylist = batchize(data, gt_map, xlist, ylist, size=430) xlist = np.array(xlist) ylist = np.array(ylist) hf = h5py.File(output_folder + '/train.h5', 'w') hf.create_dataset('x', data=xlist) hf.create_dataset('y', data=ylist) hf.close() xlist = [] ylist = [] for songname in val_songlist: filepath = data_folder + '/Audio/' + songname + '/' + songname + '_MIX.wav' data, CenFreq, time_arr = cfp_process(filepath, model_type=model_type, sr=44100, hop=256) data = batchize_val(data) ypath = data_folder + '/Annotations/Melody_Annotations/MELODY2/' + songname + '_MELODY2.csv' ref_arr = csv2ref(ypath) xlist.append(data) ylist.append(ref_arr) with open(output_folder + '/val_x.pickle', 'wb') as fp: pickle.dump(xlist, fp) with open(output_folder + '/val_y.pickle', 'wb') as fp: pickle.dump(ylist, fp)
import numpy as np import wave as we from MSnet.cfp import cfp_process import os rootdir = "./raw/" npz_dir = './train_NPZ/' list1 = os.listdir(rootdir) wav_list = [] for name in list1: wav_list.append(name.split('.')[0]) for namelist in wav_list: print(namelist) filepath = rootdir + '/' + namelist + '.wav' data, CenFreq_useless, time_arr = cfp_process(filepath, model_type='vocal') np_save_name = npz_dir + namelist + ".npz" np.savez(np_save_name, data=data, CenFreq=CenFreq_useless, time_arr=time_arr)