def makechunks_for_predict(x, window_len): chunk_total = int((x.shape[1] - window_len) / window_len) contextlen, duration, window_len = get_config() trainX = np.zeros([chunk_total, x.shape[0], window_len]) start = 0 for i_frame in range(0, chunk_total): trainX[i_frame] = x[:, start:start + window_len] start += window_len return trainX
def get_data_from_npy(data_path, label_path): # data_path = 'F:/项目/花城音乐项目/音符起始点检测/onset_db/data_pt_single_file/melgram1All.npy' # 梅尔频谱图数据 melgram1All = np.load(data_path, allow_pickle=True) means, stds = get_mean_and_std() # 标签数据 labels = np.load(label_path, allow_pickle=True).astype(int) # onset_times = [i*10e-3 for i in range(len(labels)) if labels[i] == 1] # librosa.display.specshow(melgram1All, x_axis='time', y_axis='mel', sr=44100, hop_length=441, fmax=16000) # plt.vlines(onset_times, 0, 4000, colors="c", linestyles="dashed") # plt.show() true_labels = [i for i, x in enumerate(labels) if x == 1] # 正样本的下标 # print(true_labels) false_labels = [i for i, x in enumerate(labels) if x == 0] # 负样本的下标 random_indexs = int_random_v2(0, len(false_labels), len(labels) - len(false_labels)) selected_false_labels = [false_labels[i] for i in random_indexs] # 欠采样后的负样本下标 selected_labels = true_labels + selected_false_labels #正负样本下标 selected_labels = sorted(selected_labels) np.random.shuffle(selected_labels) # print(selected_labels) contextlen, duration, window_len = get_config() trainX = makechunks(melgram1All, duration) # start =90 # lens = 15 # display_pic(trainX[start:start + lens], labels[start:start + lens]) # print(trainX.shape) # trainX.transpose((2,1)) trainX = np.transpose(trainX, (0, 2, 1)) trainY = labels.reshape(len(labels), ) # print(trainX.shape) # print(trainY.shape) # start = 100 # length = 25 # print(trainY[start:start + length]) # display_pic_T(trainX[start:start + length], None) selected_trainX = np.zeros( (len(selected_labels), trainX.shape[1], trainX.shape[2]), dtype=float) selected_trainY = np.zeros((len(selected_labels)), dtype=int) for i, x in enumerate(selected_labels): selected_trainX[i, :, :] = trainX[x, :, :] selected_trainY[i] = trainY[x] # print(selected_trainX.shape) # print(selected_trainY.shape) trainy_tmp = np.squeeze(selected_trainY) # print(Counter(trainy_tmp)) return selected_trainX, selected_trainY
def get_chunks_from_file_v2(filename): stats = np.load('./data/means_stds_jj.npy') means = stats[0] stds = stats[1] contextlen, duration, window_len = get_config() x, fs = librosa.load(filename, sr=44100) melgram1 = librosa.feature.melspectrogram(x, sr=fs, n_fft=1024, hop_length=441, n_mels=80, fmin=27.5, fmax=16000) melgram1 = 10 * np.log10(1e-10 + melgram1) melgram1 = (melgram1 - np.atleast_2d(means[0]).T) / np.atleast_2d( stds[0]).T #normalize melgram1 = zeropad2d(melgram1, contextlen) #zero pad ends contextlen, duration, window_len = get_config() trainX = makechunks_for_predict(melgram1, window_len) return trainX
def makechunks_v2(x, y, window_len): hop_len = int(window_len * 0.5) chunk_total = int((x.shape[1] - window_len) / hop_len) contextlen, duration, window_len = get_config() trainX = np.zeros([chunk_total, x.shape[0], window_len]) start = 0 trainY = np.zeros([chunk_total]) for i_frame in range(0, chunk_total): trainX[i_frame] = x[:, start:start + window_len] tmp = y[start:start + window_len] if np.max(tmp) > 0: trainY[i_frame] = 1 start += hop_len return trainX, trainY
def get_cqt_chunks_from_file(filename): stats = np.load('./data/cqt_means_stds_ourself.npy', allow_pickle=True) means = stats[0] stds = stats[1] contextlen, duration, window_len = get_config() x, fs = librosa.load(filename, sr=44100) melgram1 = librosa.amplitude_to_db(librosa.cqt(x, sr=fs, hop_length=512, n_bins=80), ref=np.max) melgram1 = (melgram1 - np.atleast_2d(means[0]).T) / np.atleast_2d( stds[0]).T #normalize melgram1 = zeropad2d(melgram1, contextlen) #zero pad ends trainX = makechunks(melgram1, duration) return trainX
def get_data_from_npy_v2(data_path, label_path): # data_path = 'F:/项目/花城音乐项目/音符起始点检测/onset_db/data_pt_single_file/melgram1All.npy' # 梅尔频谱图数据 melgram1All = np.load(data_path, allow_pickle=True) # 标签数据 labels = np.load(label_path, allow_pickle=True).astype(int) contextlen, duration, window_len = get_config() trainX, trainY = makechunks_v2(melgram1All, labels, window_len) print(trainX.shape, trainY.shape) labels = trainY true_labels = [i for i, x in enumerate(labels) if x == 1] # 正样本的下标 false_labels = [i for i, x in enumerate(labels) if x == 0] # 负样本的下标 random_indexs = int_random_v2(0, len(false_labels), len(labels) - len(false_labels)) selected_false_labels = [false_labels[i] for i in random_indexs] # 欠采样后的负样本下标 selected_labels = true_labels + selected_false_labels #正负样本下标 sorted(selected_labels) np.random.shuffle(selected_labels) print(selected_labels) # trainX.transpose((2,1)) trainX = np.transpose(trainX, (0, 2, 1)) trainY = labels.reshape(len(labels), ) print(trainX.shape) print(trainY.shape) selected_trainX = np.zeros( (len(selected_labels), trainX.shape[1], trainX.shape[2]), dtype=float) selected_trainY = np.zeros((len(selected_labels)), dtype=int) for i, x in enumerate(selected_labels): selected_trainX[i, :, :] = trainX[x, :, :] selected_trainY[i] = trainY[x] print(selected_trainX.shape) print(selected_trainY.shape) trainy_tmp = np.squeeze(selected_trainY) print(Counter(trainy_tmp)) return selected_trainX, selected_trainY
def get_chunks_from_file(filename): root_path = os.path.abspath(os.path.dirname(__file__)) data_path = os.path.join(root_path, './data/means_stds_ourself.npy') stats = np.load(data_path) means = stats[0] stds = stats[1] contextlen, duration, window_len = get_config() x, fs = librosa.load(filename, sr=44100) melgram1 = librosa.feature.melspectrogram(x, sr=fs, n_fft=1024, hop_length=441, n_mels=80, fmin=27.5, fmax=16000) melgram1 = 10 * np.log10(1e-10 + melgram1) melgram1 = (melgram1 - np.atleast_2d(means[0]).T) / np.atleast_2d( stds[0]).T #normalize melgram1 = zeropad2d(melgram1, contextlen) #zero pad ends trainX = makechunks(melgram1, duration) return trainX