def detect_click(class_path, class_name, snr_threshold_low=5, snr_threshold_high=20, tar_fs=192000): # tar_fs = 192000 # tar_fs = 400000 folder_list = find_click.list_files(class_path) if not folder_list: folder_list = folder_list + [class_path] for folder in folder_list: print(folder) count = 0 wav_files = find_click.list_wav_files(folder) # wav_files = shuffle_frames(wav_files) path_name = folder.split('/')[-1] dst_path = "./CNN_Det12_WK5/%(class)s/%(type)s" % { 'class': class_name, 'type': path_name } if not os.path.exists(dst_path): mkdir(dst_path) save_npy = True for file_name in wav_files: run_cnn_detection(file_name, snr_threshold_low, snr_threshold_high, save_npy, dst_path, tar_fs)
def load_data(data_path, n_class, batch_num=20, n_total=500): train_xs = np.empty((0, 192)) train_ys = np.empty((0, n_class)) test_xs = np.empty((0, 192)) test_ys = np.empty((0, n_class)) for c in range(0, n_class): path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} wav_files = find_click.list_wav_files(path) print("load data : %s, the number of files : %d" % (path, len(wav_files))) label = np.zeros(n_class) label[c] = 1 # xs = np.empty((0, 256)) xs = np.empty((0, 320)) count = 0 # for pathname in wav_files: wave_data, frame_rate = find_click.read_wav_file(pathname) energy = np.sqrt(np.sum(wave_data**2)) wave_data /= energy wave_data = np.reshape(wave_data, [-1]) xs = np.vstack((xs, wave_data)) count += 1 if count >= batch_num * n_total: break xs0, xs1 = split_data(xs) temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5)) temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5)) temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1)) temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1)) train_xs = np.vstack((train_xs, temp_train_xs)) train_ys = np.vstack((train_ys, temp_train_ys)) test_xs = np.vstack((test_xs, temp_test_xs)) test_ys = np.vstack((test_ys, temp_test_ys)) return train_xs, train_ys, test_xs, test_ys
def detect_save_click(class_path, class_name, snr_threshold_low=5, snr_threshold_high=100): tar_fs = 96000 signal_len = 320 folder_list = find_click.list_files(class_path) if folder_list == []: folder_list = folder_list + [class_path] for folder in folder_list: print(folder) count = 0 wav_files = find_click.list_wav_files(folder) # wav_files = shuffle_frames(wav_files) path_name = folder.split('/')[-1] dst_path = "./TKEO_wk3_complete/%(class)s/%(type)s" % { 'class': class_name, 'type': path_name } if not os.path.exists(dst_path): mkdir(dst_path) for pathname in wav_files: print(pathname) wave_data, frameRate = find_click.read_wav_file(pathname) # wave_data = resample(wave_data, frameRate, tar_fs) # [path, wavname_ext] = os.path.split(pathname) wavname = wavname_ext.split('/')[-1] wavname = wavname.split('.')[0] fl = 5000 fwhm = 0.0004 fdr_threshold = 0.65 click_index, xn = find_click.find_click_fdr_tkeo( wave_data, frameRate, fl, fwhm, fdr_threshold, signal_len, 8) scale = (2**12 - 1) / max(xn) for i in np.arange(xn.size): xn[i] = xn[i] * scale click_arr = [] for j in range(click_index.shape[0]): index = click_index[j] # click_data = wave_data[index[0]:index[1], 0] click_data = xn[index[0]:index[1]] # 信噪比过滤 detected_clicks_energy = calcu_click_energy( click_data.reshape(1, -1)) noise_estimate1 = xn[index[0] - 256:index[0]] noise_estimate2 = xn[index[1] + 1:index[1] + 257] noise_estimate = np.hstack((noise_estimate1, noise_estimate2)) noise_energy = calcu_energy(noise_estimate) if noise_energy <= 0 or detected_clicks_energy <= 0: continue snr = 10 * math.log10(detected_clicks_energy / noise_energy) if snr < snr_threshold_low or snr > snr_threshold_high: continue click_data = resample(click_data, frameRate, tar_fs) # 前置TKEO前 click_data = cut_data(click_data, signal_len) click_data = click_data.astype(np.short) click_arr.append(click_data) # filename = "%(path)s/%(pre)s_click_%(n)06d.wav" % {'path': dst_path, 'pre': wavname, 'n': count} # f = wave.open(filename, "wb") # # set wav params # f.setnchannels(1) # f.setsampwidth(2) # f.setframerate(tar_fs) # # turn the data to string # f.writeframes(click_data.tostring()) # f.close() count = count + 1 dst = "%(path)s/%(pre)s_N%(num)d.npy" \ % {'path': dst_path, 'pre': wavname, 'num': len(click_arr)} print(dst) np.save(dst, np.array(click_arr, dtype=np.short)) # if count > 20000: # break print("count = %(count)d" % {'count': count})
def load_lwy_data(batch_num=20, n_total=500): dict = { '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '' } dict[ "0"] = "/home/fish/ROBB/CNN_click/click/WavData/BBW/Blainvilles_beaked_whale_(Mesoplodon_densirostris)" dict[ "1"] = "/home/fish/ROBB/CNN_click/click/WavData/Gm/Pilot_whale_(Globicephala_macrorhynchus)" dict[ "2"] = "/home/fish/ROBB/CNN_click/click/WavData/Gg/Rissos_(Grampus_grisieus)" dict["3"] = "/home/fish/ROBB/CNN_click/click/WavData/Tt/palmyra2006" dict["4"] = "/home/fish/ROBB/CNN_click/click/WavData/Dc/Dc" dict["5"] = "/home/fish/ROBB/CNN_click/click/WavData/Dd/Dd" dict["6"] = "/home/fish/ROBB/CNN_click/click/WavData/Melon/palmyra2006" dict["7"] = "/home/fish/ROBB/CNN_click/click/WavData/Spinner/palmyra2006" n_class = len(dict) train_xs = np.empty((0, 192)) train_ys = np.empty((0, n_class)) test_xs = np.empty((0, 192)) test_ys = np.empty((0, n_class)) for key in dict: # path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} path = dict[key] c = int(key) wav_files = find_click.list_wav_files(path) print("load data : %s, the number of files : %d, class: %d" % (path, len(wav_files), c)) label = np.zeros(n_class) label[c] = 1 # xs = np.empty((0, 256)) xs = np.empty((0, 320)) count = 0 # for pathname in wav_files: wave_data, frame_rate = find_click.read_wav_file(pathname) # energy = np.sqrt(np.sum(wave_data ** 2)) # wave_data /= energy wave_data = np.reshape(wave_data, [-1]) xs = np.vstack((xs, wave_data)) count += 1 if count >= (batch_num + 10) * n_total: break xs0, xs1 = split_data(xs) temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5)) temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5)) temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1)) temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1)) train_xs = np.vstack((train_xs, temp_train_xs)) train_ys = np.vstack((train_ys, temp_train_ys)) test_xs = np.vstack((test_xs, temp_test_xs)) test_ys = np.vstack((test_ys, temp_test_ys)) return train_xs, train_ys, test_xs, test_ys
dict[ "6"] = "/media/ywy/本地磁盘/Data/MobySound/5th_Workshop/5th_DCL_data_melon-headed" dict[ "7"] = "/media/ywy/本地磁盘/Data/MobySound/5th_Workshop/5th_DCL_data_spinner" dict["0"] = "/media/ywy/本地磁盘/Data/MobySound/3rd_Workshop/" \ "Training_Data/Blainvilles_beaked_whale_(Mesoplodon_densirostris)" dict[ "1"] = "/media/ywy/本地磁盘/Data/MobySound/3rd_Workshop/Training_Data/Pilot_whale_(Globicephala_macrorhynchus)" dict[ "2"] = "/media/ywy/本地磁盘/Data/MobySound/3rd_Workshop/Training_Data/Rissos_(Grampus_grisieus)" for key in dict: print(dict[key]) count = 0 wav_files = find_click.list_wav_files(dict[key]) dst_path = "./Data/ClickC8/%(class)s" % {'class': key} mkdir(dst_path) for pathname in wav_files: print(pathname) wave_data, frameRate = find_click.read_wav_file(pathname) fl = 5000 fwhm = 0.0008 fdr_threshold = 0.62 click_index, xn = find_click.find_click_fdr_tkeo( wave_data, frameRate, fl, fwhm, fdr_threshold, signal_len, 8)
def load_data_lstm(data_path, n_class, batch_num=20, n_total=500): train = [] test = [] x_in = tf.placeholder("float", [None, 192]) # 输入 x_image = tf.reshape(x_in, [-1, 1, 192, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * 48 * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 48 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net.ckpt") # 加载训练好的网络参数 test_cnn = [] for c in range(0, n_class): # path = "./Data/Click/%(class)d" % {'class': c} path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} wav_files = find_click.list_wav_files(path) print("load data : %s, the number of files : %d" % (path, len(wav_files))) # xs = np.empty((0, 256)) xs = np.empty((0, 320)) count = 0 for pathname in wav_files: wave_data, frame_rate = find_click.read_wav_file(pathname) energy = np.sqrt(np.sum(wave_data**2)) wave_data /= energy wave_data = np.reshape(wave_data, [-1]) xs = np.vstack((xs, wave_data)) count += 1 if count > batch_num * n_total: break xs0, xs1 = split_data(xs) sample_num = xs0.shape[0] for i in range(0, int(n_total * 4 / 5)): frames = np.empty((0, 256)) for j in range(batch_num * i, batch_num * (i + 1)): index = j % sample_num temp_x = xs0[index] # beg_idx = np.random.randint(0, 32) beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) ftu = sess.run(h_fc1, feed_dict={x_in: crop_x}) # 计算CNN网络输出 frames = np.vstack((frames, ftu)) frames = np.expand_dims(np.expand_dims(frames, axis=0), axis=0) frames = list(frames) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) sample = frames + label train.append(sample) sample_num = xs1.shape[0] for i in range(0, int(n_total / 5)): frames = np.empty((0, 256)) tmp_xs = np.empty((0, 192)) for j in range(batch_num * i, batch_num * (i + 1)): index = j % sample_num temp_x = xs1[index] # beg_idx = np.random.randint(0, 32) beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) ftu = sess.run(h_fc1, feed_dict={x_in: crop_x}) # 计算CNN网络输出 frames = np.vstack((frames, ftu)) tmp_xs = np.vstack((tmp_xs, crop_x)) frames = np.expand_dims(np.expand_dims(frames, axis=0), axis=0) frames = list(frames) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) sample = frames + label test.append(sample) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label test_cnn.append(sample) count = 0 for i in range(len(test_cnn)): test_xs = test_cnn[i][0] label = np.zeros(n_class) for j in range(0, test_xs.shape[1]): txs = test_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out_y = sess.run(y, feed_dict={x_in: txs, keep_prob: 1.0}) c = np.argmax(out_y, 1) label[c] += 1 ref_y = test_cnn[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy: ', round(count / len(test_cnn), 3)) return train, test
def test_cnn_batch_data(data_path, n_class, batch_num=20, n_total=500): click_batch = [] for c in range(0, n_class): path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} wav_files = find_click.list_wav_files(path) print("load data : %s, the number of files : %d" % (path, len(wav_files))) # 为避免训练网络用的Click用于测试, 类似于训练时区分训练和测试样本 # 利用全部样本后1/5的Click生成测试样本 xs = np.empty((0, 320)) count = 0 split_idx = int(len(wav_files) * 4 / 5) for pathname in wav_files: count += 1 if count < split_idx: continue wave_data, frame_rate = find_click.read_wav_file(pathname) energy = np.sqrt(np.sum(wave_data ** 2)) wave_data /= energy wave_data = np.reshape(wave_data, [-1]) xs = np.vstack((xs, wave_data)) if count >= batch_num * n_total: break sample_num = xs.shape[0] for i in range(0, int(n_total / 5)): tmp_xs = np.empty((0, 192)) for j in range(batch_num * i, batch_num * (i + 1)): index = j % sample_num temp_x = xs[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) tmp_xs = np.vstack((tmp_xs, crop_x)) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label click_batch.append(sample) x = tf.placeholder("float", [None, 192]) # 输入 x_image = tf.reshape(x, [-1, 1, 192, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * 48 * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 48 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net.ckpt") # 加载训练好的网络参数 count = 0 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) c = np.argmax(out_y, 1) label[c] += 1 ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (majority voting): ', round(count / len(click_batch), 3)) count = 0 weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out = sess.run(weight, feed_dict={x: txs, keep_prob: 1.0}) out = np.reshape(out, label.shape) label = label + out ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3)) count = 0 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) out = np.reshape(out, label.shape) label = label + out ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (sum of softmax voting): ', round(count / len(click_batch), 3))