def genPdf(fullpath_name): my_data = PreProcess(fullpath_name).process() with PdfPages(fullpath_name.replace('xlsx', 'pdf'), keep_empty=True) as pdf: plt.figure(figsize=(10, 7)) plt.subplots_adjust(bottom=0.05, top=0.95, left=0.05, right=0.95, hspace=0.05) subplot_index = -1 slice_data_len = 2500 while my_data.size > 0: print(my_data.size) subplot_index += 1 subplot_index %= 4 my_plot(subplot_index + 1, my_data.head(slice_data_len)) my_data = my_data.slice_shift(-slice_data_len) if subplot_index == 3: # each page contains 4 plotting pdf.savefig(papertype='a4') # start a new page to plot plt.figure(figsize=(10, 7)) plt.subplots_adjust(bottom=0.05, top=0.95, left=0.05, right=0.95, hspace=0.05) # save the rest plotting if subplot_index != 3: pdf.savefig(papertype='a4')
def __init__(self,filepath,fileReadMode='zip',vocabSize =20000): self.PreProcessObj = PreProcess(filepath,fileReadMode) data,word2Int,int2Word = self.PreProcessObj.processCorpus(vocabSize); self.data = data self.word2Int = word2Int self.int2Word = int2Word self.vocabSize = self.PreProcessObj.getVocabSize()
def data_preprocess(): print('Loading data...') preprocess = PreProcess() train_X_w2v, train_Y = preprocess.data_preprocess(train_data_path) # 训练数据 dev_X_w2v, dev_Y = preprocess.data_preprocess(dev_data_path) #交叉验证数据 return train_X_w2v, train_Y, dev_X_w2v, dev_Y
def find_foot_y(foot_x, height, imgs_path, png_names, reverse, left_reverse): foot_y_list = [] for img_name in png_names: # print("img_name", img_name) depth_bg = cv2.imread(imgs_path + "/" + img_name, -1) pp = PreProcess() img2 = pp.exec(depth_bg) img2 = img2.astype("uint8") ret, img2 = cv2.threshold(img2, 0, 255, 16) if left_reverse: img2 = np.flip(img2, axis=1) if reverse: img2 = ~img2 # img2 = np.flip(img2, axis=1) # img_path = os.path.join(imgs_path, img_name) # print("img_path", img_path) # if reverse: # img2 = ~img2 # start4 = time.time() # img_array, img3 = get_array_opencv(img_path, reverse=True) img_array = img2.transpose() mmm = np.where(img_array[foot_x] == 0)[0] if len(mmm) == 0: foot_y = foot_y_list[-1] else: foot_y = height - mmm[0] print("foot_y", foot_y) foot_y_list.append(foot_y) foot_y_list.sort() return foot_y_list[len(foot_y_list) // 2]
def __init__(self, model, h_params: HParams): self.h_params: HParams = h_params self.device = h_params.resource.device self.model = model self.pretrained_load( f"{self.h_params.test.pretrain_path}/{self.h_params.test.pretrain_dir_name}/{self.h_params.test.pretrain_module_name}.pth" ) self.preprocessor = PreProcess(h_params) self.time_log = datetime.now().strftime('%y%m%d-%H%M%S')
def preprocess(self): preprocessor = PreProcess(self.h_params) for data_name in self.h_params.data.name_list: data_output_root = os.path.join(self.h_params.data.root_path, data_name) + "/Preprocessed" data_root = self.h_params.data.original_data_path + "/" + data_name test_song_list = np.genfromtxt(data_root + "/test_song.txt", dtype="str") song_list = [ fname.split("_mix")[0] for fname in os.listdir(data_root + "/mix") ] preprocessor.pre_process(data_root, data_output_root, song_list, test_song_list)
def main(): PreProcess.set_all_terms() evaluation = Evaluation() # Naive Bayes : # train = PreProcess('train',0,1) test = PreProcess('test',0,1) # naive_bayes = NaiveBayes(train.x,train.y,train.all_terms) # print(evaluation.get_accuracy(naive_bayes.test(train.x[0:200]), test.y[0:200])) # SVM : train = PreProcess('train',0,.9) validation = PreProcess('train',.9,1) out = []
def __init__(self, h_params: HParams): self.pre_processor = PreProcess(h_params) self.device = h_params.resource.device self.h_params = h_params self.util = Util() self.batch_size = self.h_params.train.batch_size if h_params.train.model == "JDCUNET": self.model = JDCPlusUnet(self.h_params.resource.device).to( self.h_params.resource.device) if h_params.train.model == "UNETONLY": self.model = UnetOnly(self.h_params.resource.device).to( self.h_params.resource.device) self.phase = None self.normalize_value = 0 self.output_path = None self.output_name = ""
def preProcess (inputFile): pre = PreProcess (inputFile) pre.preprocessText () text = pre.getCorpus () ids = pre.getColumn ('id') del pre return text, ids
def prediction(): test_X, test_Y = data_preprocess() test_Y = np.argmax(test_Y, axis=1) print('\nPrediction\n') checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # load the saved meta graph and restore variables saver = tf.train.import_meta_graph( '{}.meta'.format(checkpoint_file)) saver.restore(sess, checkpoint_file) # get the placeholders from the graph by name input_x = graph.get_operation_by_name('input_x').outputs[0] dropout_keep_prob = graph.get_operation_by_name( 'dropout_keep_prob').outputs[0] # tensor we want to evaluate predictions = graph.get_operation_by_name( 'output/predictions').outputs[0] #generate batches for one epoch batches = PreProcess().batch_iter(list(test_X), FLAGS.batch_size, 1, shuffle=False) #collect the predictions here all_predictions = [] for x_batch in batches: batch_predictions = sess.run(predictions, feed_dict={ input_x: x_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) correct_predictions = float(sum(all_predictions == test_Y)) print('测试样例一共有: {}'.format(len(test_Y))) print('准确率为: {:.3%}'.format(correct_predictions / float(len(test_Y))))
class Tester(ABC): def __init__(self, model, h_params: HParams): self.h_params: HParams = h_params self.device = h_params.resource.device self.model = model self.pretrained_load( f"{self.h_params.test.pretrain_path}/{self.h_params.test.pretrain_dir_name}/{self.h_params.test.pretrain_module_name}.pth" ) self.preprocessor = PreProcess(h_params) self.time_log = datetime.now().strftime('%y%m%d-%H%M%S') ''' ============================================================== abstract method start ============================================================== ''' @abstractmethod def read_test_data(self, meta_data, data_name): ''' mus be included {"model_input":input feature ,"seg_dim_size": } ''' pass @abstractmethod def post_processing(self, model_output, test_data): pass ''' ============================================================== abstract method end ============================================================== ''' def test_test_set(self, data_name): meta_data_list = self.preprocessor.get_train_test_meta_data_list( data_name) test_meta_data_list = [ meta_data for meta_data in meta_data_list if meta_data["data_type"] == "test" ] for i, test_meta_data in enumerate(test_meta_data_list): print(f"{i+1}/{len(test_meta_data_list)} {test_meta_data['name']}") self.test_one_sample(test_meta_data, data_name) def test_one_sample(self, meta_data, data_name): self.set_output_path(meta_data=meta_data) test_data = self.read_test_data(meta_data, data_name) batch_input = self.make_batch(test_data["model_input"], test_data["seg_dim_size"], self.h_params.dataset.segment_size) output = self.make_output(batch_input) self.post_processing(output, test_data) def pretrained_load(self, pretrain_path): best_model_load = torch.load(pretrain_path, map_location='cpu') self.model.load_state_dict(best_model_load) self.model.to(self.device) def set_output_path(self, meta_data): dataname = meta_data['name'] model_info = self.h_params.test.pretrain_dir_name model_load_info = self.h_params.test.pretrain_module_name self.output_path = f"{self.h_params.test.output_path}/{model_info}({model_load_info})/{dataname}" os.makedirs(self.output_path, exist_ok=True) def make_batch(self, input: dict, segment_dim_size: int, segment_size: int): batch_data = dict() for feature in input: batch_data[feature] = None total_size = segment_dim_size for start_idx in range(0, total_size, segment_size): end_idx = start_idx + segment_size for feature in input: if type(input[feature]) not in [list, np.ndarray]: continue feature_seg = input[feature][..., start_idx:end_idx] if feature_seg.shape[-1] != segment_size: padding = segment_size - feature_seg.shape[-1] if len(feature_seg.shape) == 1: feature_seg = np.pad(feature_seg, (0, padding), 'constant', constant_values=0) elif len(feature_seg.shape) == 2: feature_seg = np.pad(feature_seg, ((0, 0), (0, padding)), 'constant', constant_values=0) else: continue feature_seg = np.expand_dims(feature_seg, axis=0) batch_data[feature] = feature_seg if batch_data[ feature] is None else np.vstack( (batch_data[feature], feature_seg)) return batch_data def make_output(self, batch_input): total_pred_features = dict() batch_size = 16 for start_idx in range(0, batch_input['mix_stft'].shape[0], batch_size): end_idx = start_idx + batch_size batch_input_torch = torch.from_numpy( batch_input['mix_stft'][start_idx:end_idx, ...]).to(self.device).float() with torch.no_grad(): pred_features = self.model(batch_input_torch) for feature_name in pred_features: if feature_name not in total_pred_features: total_pred_features[feature_name] = None total_pred_features[feature_name] = self.unzip_batch( pred_features[feature_name], total_pred_features[feature_name]) return total_pred_features def unzip_batch(self, batch_data, unzip_data): numpy_batch = batch_data.detach().cpu().numpy() for i in range(0, numpy_batch.shape[0]): unzip_data = numpy_batch[ i] if unzip_data is None else np.concatenate( (unzip_data, numpy_batch[i]), axis=-1) return unzip_data
def train(): print('start training...') train_X, train_Y, dev_X, dev_Y = data_preprocess() with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=train_X.shape[1], num_classes=train_Y.shape[1], embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(','))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # define Training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) # Adam optimization algorithm grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram('{}/grad/hist'.format(v.name), g) sparsity_summary = tf.summary.scalar('{}/grad/spasity'.format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, 'logs', timestamp)) print('Writting to {}\n'.format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar('loss', cnn.loss) acc_summary = tf.summary.scalar('accuracy', cnn.accuracy) # train summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # checkpoint directory. tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoint')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): # a single training step feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, result = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.result], feed_dict) # print result time_str = datetime.datetime.now().isoformat() print('{}: 训练集 第{}次, loss:{:g}, accuracy:{:g}'.format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): # evaluates model on a dev set feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print('{}: 验证集 第{}次, loss: {:g}, accuracy: {:g}'.format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # generate batches batches = PreProcess().batch_iter( list(zip(train_X, train_Y)), FLAGS.batch_size, FLAGS.num_epochs) # training loop. For each batch for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print('\nEvaluation:') dev_step(dev_X, dev_Y, writer=dev_summary_writer) print('') if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print('Saved model checkpoint to {}\n'.format(path))
from PreProcess import PreProcess def my_plot(layout, data): print(layout) plt.subplot(4, 1, layout) plt.plot(data.get_values(), color='black', linewidth=0.5) setupaxis.setup_axis() if __name__ == '__main__': path = 'C:\\Users\\user\\Downloads\\' filename = 'processed_ecg.xlsx' # filename = '29.xlsx' # path = r'C:\Users\user\Desktop\整机\典型927\朱媛媛\\' # filename = r'16:57.xlsx' fullpath_name = path + filename my_data = PreProcess(fullpath_name).process(False) plt.figure(figsize=(20, 4)) plt.subplots_adjust(bottom=0, top=1, left=0, right=1, hspace=0) plt.plot(my_data.get_values(), color='black', linewidth=0.5) setupaxis.setup_axis(False) plt.show() plt.savefig(filename.replace('xlsx', 'png'))
# 返回val_mse最小的模型 least_mse = np.min(history.history['val_mean_squared_error']) model.load_weights(self.save_dir + 'checkpoint-val_mse_%.6f.hdf5' % (least_mse)) # test数据集,计算预测结果 Yhat = model.predict(np.transpose(Xtest, (0, 2, 1))) del model return least_mse, Yhat if __name__ == '__main__': # tensorflow use cpu os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" preprocess = PreProcess() cnn = TrafficCNN() num_features = 228 cand_list = preprocess.select_k_nearest(num_features) Model_dir = 'cnnModel_all/' if not os.path.exists(Model_dir): os.mkdir(Model_dir) for predictday in [14, 17, 20]: Xtrain, Ytrain, Xdev, Ydev, Xtest = preprocess.readdata(predictday) # 创建模型保存路径 if not os.path.exists(Model_dir + 'predictday%i' % predictday): os.mkdir(Model_dir + 'predictday%i' % predictday) cnn.save_dir = Model_dir + 'predictday%i/' % predictday mse, Yhat = cnn.model(Xtrain, Ytrain, Xdev, Ydev, Xtest) Yhat = preprocess.data_inv_tf(Yhat)
# 返回val_mse最小的模型 least_mse = np.min(history.history['val_mean_squared_error']) model.load_weights(self.save_dir + 'checkpoint-val_mse_%.6f.hdf5' % (least_mse)) # test数据集,计算预测结果 Yhat = model.predict(np.transpose(Xtest, (0, 2, 1))) del model return least_mse, Yhat if __name__ == '__main__': # tensorflow use cpu os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" preprocess = PreProcess() cnn = TrafficCNN() #num_features = 30 cand_list = preprocess.select_nearest() Model_dir = 'cnnModel/' if not os.path.exists(Model_dir): os.mkdir(Model_dir) test_file = 'csv_file/test.txt' # with open(test_file, 'a') as f: # f.write(str(num_features) + '\n') for predictday in [20]: Xtrain, Ytrain, Xdev, Ydev, Xtest = preprocess.readdata(predictday) # 创建模型保存路径 if not os.path.exists(Model_dir + 'predictday%i' % predictday): os.mkdir(Model_dir + 'predictday%i' % predictday)
def main(dir_pic, dir_out_pic, start_num, left_reverse): ''' 输入:深度图 输出:判别结果 Step1:先计算第一张图,调用动态链接库,输入一张深度图的数据,得到这张图的rgb图像矩阵(因为中间过程生成的rgb图像不会保存) 要计算初始值的是 脚和肩的横坐标。 Step2: 后面开始 0或者255,分别代表黑色和白色。 ''' start = time.time() foot_threshold_start = 0.05 foot_threshold_end = 0.15 knee_expand_length = 20 thigh_threshold = 0.720 back1_threshold = 0.770 back2_threshold = 0.880 shoulder_threshold_start = 0.6 shoulder_threshold_end = 1 shoulder_height_threshold = 15 source_path = r"C:\Users\yanhao\Desktop\taishan_project\test_0_norm _del" P_list = ["P003_3", "P004", "P005", "P006", "P007", "P008", "P009", "P010"] P = "P003_3" origin = "origin4" suffix_png = ".png" suffix_json = ".json" reverse = True # left_reverse = False foot_y_criterion = -1 foot_x = -1 knee_x = -1 shoulder_x = -1 shoulder_y_list = [] shoulder_y_reverse_list = [] angels_list = [] knee_angel_list = [] hip_angel_list = [] shoulder_angel_list = [] standard = "standard" # 腿踝,屁股,深度,标准正确率 standard_dict = {standard: [155, 148, 7, 0.8]} imgs_path = os.path.join(dir_pic) # nums = len(os.listdir(imgs_path)) count = 1 # end = ["470norm.png","432norm.png","620norm.png","528norm.png","596norm.png","278norm.png","620norm.png","620norm.png"] start_images = start_num # end_images = end_num # end = [1:340,2:283,3:266,5:345] png_names = os.listdir(imgs_path) png_names = natsorted(png_names, alg=ns.PATH) nums = 0 foot_y_list = [] foot_count = 0 #################################################### pp = PreProcess() depth_first = cv2.imread(imgs_path + "/" + png_names[start_images + 60], -1) pp.exec(depth_first) ##################################################### for img_name in png_names[start_images:]: print("img_name", img_name) nums += 1 # try: depth_bg = cv2.imread(imgs_path + "/" + img_name, -1) # pp = PreProcess() img2 = pp.exec(depth_bg) img2 = img2.astype("uint8") ret, img2 = cv2.threshold(img2, 0, 255, 16) # img2 = np.flip(img2, axis=1) # img_path = os.path.join(imgs_path, img_name) # print("img_path", img_path) if left_reverse: img2 = np.flip(img2, axis=1) if reverse: img2 = ~img2 # start4 = time.time() # img_array, img3 = get_array_opencv(img_path, reverse=True) img_array = img2.transpose() # cv2.imshow('image', img2) # cv2.waitKey(0) # cv2.destroyAllWindows() # print(img_array.shape) end4 = time.time() # all_time4 = (end4 - start4) * 1000 # print("加载图像要", all_time4, "毫秒") # height, width = len(img_array), len(img_array[0]) width, height = img_array.shape if foot_x <= 0 or knee_x <= 0 or shoulder_x <= 0: # img_array = img_array.transpose() # print(img_name) # im_np = np.array(img_array) start3 = time.time() foot_x = get_foot_points(img_array, foot_threshold_start=foot_threshold_start, foot_threshold_end=foot_threshold_end) # print("foot_x",foot_x) # print(img_array[foot_x]) foot_y = height - np.where(img_array[foot_x] == 0)[0][0] print("foot_x", foot_x) print("foot_y", foot_y) knee_x = get_knee_point(img_array, foot_x, back1_threshold=back1_threshold) shoulder_x = get_shoulder_point( img_array, shoulder_threshold_start=shoulder_threshold_start, shoulder_threshold_end=shoulder_threshold_end) print("shoulder_x", shoulder_x) end3 = time.time() all_time3 = (end3 - start3) * 1000 print("第一次总共运行了", all_time3, "毫秒") foot_y_criterion = find_foot_y(foot_x, height, imgs_path, png_names[start_images + 50:start_images + 60], reverse=reverse, left_reverse=left_reverse) foot_y_list.append(foot_y_criterion) # print(foot_x, knee_x, shoulder_x) # start2 = time.time() foot_count += 1 if (foot_count == 6): print("-----------------------foot_image_name", img_name) break if (len(np.where((~img_array)[foot_x] == 255)[0]) == 0): print("-----------------------image_name", img_name) continue foot_count = 0 knee_angel, hip_angel, shoulder_angel, coordinate_keys = find_angel( ~img_array, foot_x, knee_x, foot_y_criterion, foot_y_list, knee_expand_length=knee_expand_length, thigh_threshold=thigh_threshold, back1_threshold=back1_threshold, back2_threshold=back2_threshold) angel = [knee_angel, hip_angel, shoulder_angel] shuchutupian(img2, img_array, angel, coordinate_keys, count, result_path=dir_out_pic) count += 1 # end2 = time.time() # all_time2 = (end2 - start) * 1000 # print("闫浩总共运行了", all_time2, "毫秒") knee_angel_list.append(knee_angel) hip_angel_list.append(hip_angel) shoulder_angel_list.append(shoulder_angel) # shoulder_y = get_h(img_array1, shoulder_x) try: shoulder_y = np.where(img_array[shoulder_x] == 0)[0][0] except: print(img_name) break shoulder_y_list.append(shoulder_y) shoulder_y_reverse_list.append(height - shoulder_y) # except: # continue print("knee_angel_list", knee_angel_list) print("hip_angel_list", hip_angel_list) print("shoulder_angel_list", shoulder_angel_list) # end2 = time.time() # all_time2 = (end2 - start) * 1000 - all_time3 # print("闫浩总共运行了", all_time2, "毫秒") # print("shoulder_angel_list", shoulder_angel_list) print(shoulder_y_list) print(shoulder_y_reverse_list) # trough_list = get_peak(shoulder_y_list, height=140, distance=20, window_length=15, polyorder=2) # peak_list = get_peak(shoulder_y_reverse_list, height=280, distance=20, window_length=15, polyorder=2) ############################################################################################################# shoulder_y_list_sort = sorted(shoulder_y_list[:5]) # print("shoulder_y_list_sort",shoulder_y_list_sort) shoulder_y_reverse_list_sort = sorted(shoulder_y_reverse_list[:5]) trough_list = get_peak(shoulder_y_list, height=shoulder_y_list_sort[2] - shoulder_height_threshold, distance=20, window_length=15, polyorder=2) peak_list = get_peak(shoulder_y_reverse_list, height=shoulder_y_reverse_list_sort[2] - shoulder_height_threshold, distance=20, window_length=15, polyorder=2) # 波峰 ################################################################################################################### print("trough_list", trough_list) print("peak_list", peak_list) # trough_list [113 159 203 228 275 339] # peak_list [ 37 71 138 182 216 246 304 362] # 通过 Image.open 读取图片得到的结果 # trough_list [113 159 203 228 275 339] # peak_list [ 37 71 138 182 216 246 304 362] # 通过 skimage.io 读取图片得到的结果 # trough_list [113 159 203 228 275 339] # peak_list [ 37 71 138 182 216 246 304 362] action_list, action_right_dict = get_action_list(peak_list, trough_list) print("action_list", action_list) # print("shoulder_angel_list", shoulder_angel_list) action_list_new_by_gap, action_right_dict = judge_by_gap_angel( shoulder_angel_list, action_list, action_right_dict, gap_right_angel=standard_dict[standard][2]) action_list_new, action_right_dict = judge_by_angel( knee_angel_list, hip_angel_list, action_list_new_by_gap, action_right_dict, knee_right_angel=standard_dict[standard][0], hip_right_angel=standard_dict[standard][1], right_threshold=0.8) print("action_right_dict", action_right_dict) # print(action_right_dict) num = len(action_list_new) # print(action_list_new) # [[71, 137], [138, 181], [182, 215], [216, 245], [246, 303], [304, 361]] print("兄弟,你做了", num, "个俯卧撑,棒棒哒~") # print("ok") end = time.time() all_time = (end - start) * 1000 avg_time = all_time / nums print("俯卧撑的动作区间是:", action_list) print("每个俯卧撑动作区间的概率是:", action_right_dict) print("总共运行了", all_time, "毫秒") print("一共", nums, "张图片") print("平均每张图片运行了", avg_time, "毫秒") return num, action_list, action_right_dict
def MainProcess(logger, uperList, saveRootPath, concurrency=3): pp = None try: # -------------------------------------------------------------- # 进行每个 UP 主视频页数的获取 pp = PreProcess(logger=logger, uperList=uperList) pp.ScanLoclInfo(saveRootPath) pp.Process() # -------------------------------------------------------------- # 爬取要下载视频的 url for uper in pp.uperList: logger.info(uper.UserName + " Spider Start···") OneSpiderRetryTimes = 0 # 打算下载的数量,要去网络动态获取的数量进行对比 while ((uper.NeedDownloadFilmCount > len( uper.VideoInfoDic_NetFileName) or len(uper.ErrorUrl_Dic) > 0) and OneSpiderRetryTimes <= 10): # dd = BiliSpider() # GithubDeveloperSpider BiliSpider.start(logger=logger, uper=uper, saveRootPath=saveRootPath, concurrency=concurrency, middleware=middleware) OneSpiderRetryTimes = OneSpiderRetryTimes + 1 logger.info("Try Spider " + uper.UserName + " " + str(OneSpiderRetryTimes) + " times.") RandomSleep() logger.info(uper.UserName + " Spider Done.") if OneSpiderRetryTimes > 10: logger.error(uper.UserName + " Spider Retry " + str(OneSpiderRetryTimes) + "times.") logger.error("Error Url:") for eUrl in uper.ErrorUrl_Dic: logger.error(eUrl) else: # 本地应该原有+准备要下载的 != 网络总数,需要提示 if len(uper.VideoInfoDic_NetFileName) != len( uper.VideoInfoDic_loaclFileName): logger.warn("VideoInfoDic_NetFileName Count: " + str(len(uper.VideoInfoDic_NetFileName)) + " != VideoInfoDic_loaclFileName Count: " + str(len(uper.VideoInfoDic_loaclFileName))) uper.ErrorUrl_Dic.clear() logger.info("Spider All Done.") # -------------------------------------------------------------- logger.info("Start Download" + "----" * 20) # 开始下载 # 先对 local 与 net 的字典进行同步 logger.info("Start Sync Dic") for uper in pp.uperList: iNeedDl = 0 for fileName, oneVideo in zip( uper.VideoInfoDic_loaclFileName.keys(), uper.VideoInfoDic_loaclFileName.values()): # 匹配到对应的项目才进行处理 findList = list( filter(lambda d: d.split('_')[1] == fileName, uper.VideoInfoDic_NetFileName.keys())) if any(findList): uper.VideoInfoDic_NetFileName[ findList[0]].isDownloaded = oneVideo.isDownloaded if oneVideo.isDownloaded == False: iNeedDl = iNeedDl + 1 logger.info(uper.UserName + "NetFile / LocalFile -- NeedDl: " + str(len(uper.VideoInfoDic_NetFileName)) + " / " + str(len(uper.VideoInfoDic_loaclFileName)) + " -- " + str(iNeedDl)) logger.info("End Sync Dic") for uper in pp.uperList: directory = os.path.join(saveRootPath, uper.UserName) for fileName, oneVideo in zip( uper.VideoInfoDic_NetFileName.keys(), uper.VideoInfoDic_NetFileName.values()): if oneVideo.isDownloaded == True: continue DownloadRetryTimes = 0 oneRe = False while oneRe is False and DownloadRetryTimes <= 10: oneRe = Downloader(logger, directory, oneVideo.time, oneVideo.title, oneVideo.url).ProcessOne() DownloadRetryTimes = DownloadRetryTimes + 1 logger.info("Try Download " + str(DownloadRetryTimes) + " times.") RandomSleep() if OneSpiderRetryTimes > 10: logger.error("Retry Download " + str(DownloadRetryTimes) + " times.") logger.error("Error Url: " + oneVideo.url) # 标记下载完成 if oneRe: oneVideo.isDownloaded = True uper.ThisTimeDownloadCount = uper.ThisTimeDownloadCount + 1 except Exception as ex: errInfo = "Catch Exception: " + str(ex) logger.error(errInfo) finally: logger.info("finally" + "----" * 20) for uper in pp.uperList: logger.info("This Time Download: " + uper.UserName + " -- " + str(uper.ThisTimeDownloadCount)) for uper in pp.uperList: for fileName, oneVideo in zip( uper.VideoInfoDic_NetFileName.keys(), uper.VideoInfoDic_NetFileName.values()): if oneVideo.isDownloaded == False: logger.error('Download Fail:' + uper.UserName) logger.error(oneVideo.url) logger.info("This Time Done.")
#Code written by Kevin Peters, Michael Simmons, Michael West, Nikky Rajavasireddy, Blake Crowther import numpy as np from sklearn.tree import DecisionTreeClassifier from PreProcess import PreProcess class d_tree: def __init__(self, data): self.dtree = DecisionTreeClassifier(criterion='entropy') self.cutoff = data.shape[1] - 1 self.dtree.fit(data[:, 0:self.cutoff], np.asarray(data[:, self.cutoff], int)) def test_score(self, test_data): return self.dtree.score(test_data[:, 0:self.cutoff], test_data[:, self.cutoff]) data = PreProcess('income-training.csv', True).fitted_data test_data = PreProcess('income-test.csv', False).fitted_data tree = d_tree(data) print(tree.test_score(test_data))
def run_bootstrap(N, delta_error, V_dir, V_Fs, alignment='aligned'): V_fileinfo = os.listdir(V_dir) V_Err = [] V_Err_noise = [] NA = [] V_UpperLimb_data_arr = [] DistData = [] Steps = 10 for i in range(0, len(V_fileinfo)): V_fn = V_dir + os.sep + V_fileinfo[i] temp = pd.read_csv(V_fn, skiprows=4) temp = temp.values V_time = temp[:, 0] / V_Fs V_data = temp[:, 2:] del (temp) V_data = PreProcess(V_data) V_UpperLimb_data_arr.append( get_VICON_UL_prop(V_data, V_Fs, -V_Fs) / 1000.0) for i in range(0, N): V_Err_noise = [] V_Err = [] NA = [] for j in range(0, len(V_fileinfo)): V_UpperLimb_data = V_UpperLimb_data_arr[j] #arr = np.random.normal(loc=0., scale=[0.009, 0.004, 0.002], size=V_UpperLimb_data.shape) V_UpperLimb_data_noise = V_UpperLimb_data #+ delta_error + 0.01*np.random.randn(V_UpperLimb_data.shape[0], V_UpperLimb_data.shape[1], V_UpperLimb_data.shape[2]) N_joints = 8 # for k in range(0, N_joints): # joint = np.squeeze(V_UpperLimb_data_noise[k, :, :]) # V_UpperLimb_data_noise[k, :, :] = savgol_filter(joint, 49, 3, axis=0) V_MeanPos = np.squeeze(np.mean(V_UpperLimb_data, axis=1)) V_MeanPos_noise = np.squeeze( np.mean(V_UpperLimb_data_noise, axis=1)) #import pdb; pdb.set_trace() arr1 = np.random.normal( loc=0., scale=[0.009, 0.004, 0.002], size=V_MeanPos_noise.shape) #White noise error arr2 = np.random.normal( loc=0., scale=[0.01, 0.01, 0.01], size=V_MeanPos_noise.shape) #Joint mismatch error V_MeanPos_noise = V_MeanPos_noise + arr1 + arr2 #0.009*np.random.randn(V_MeanPos_noise.shape[0],V_MeanPos_noise.shape[1]) if (alignment == 'aligned'): V_MeanPos_noise, V_MeanPos, _, _ = proprioception_align( V_MeanPos_noise, V_MeanPos) v_err_noise, v_err, norm_angle = proprioception_error_combo( V_MeanPos_noise, V_MeanPos) else: v_err_noise, v_err, norm_angle = proprioception_error_combo_unaligned( V_MeanPos_noise, V_MeanPos) V_Err_noise.append(v_err_noise) V_Err.append(v_err) NA.append(norm_angle) if (i + 1) % (N / Steps) == 0: print((100 * (i + 1) / N), " % completed...") V_Err_noise = np.array(V_Err_noise) V_Err = np.array(V_Err) NA = np.array(NA) DistData.append(V_Err_noise - V_Err) return DistData
class Word2Vec: data_index = 0 def __init__(self,filepath,fileReadMode='zip',vocabSize =20000): self.PreProcessObj = PreProcess(filepath,fileReadMode) data,word2Int,int2Word = self.PreProcessObj.processCorpus(vocabSize); self.data = data self.word2Int = word2Int self.int2Word = int2Word self.vocabSize = self.PreProcessObj.getVocabSize() def configure(self,training_steps=200000,batch_size=128,valid_size=70,validSampStInd = 100,validSampEndInd=200,embedding_size=128,skip_window=3,num_skips=2,lossfunction='nce',optimiser='gradient_descent',learning_rate=1.0,num_sampled_nce=64): self.training_steps = training_steps self.batch_size = batch_size self.valid_size = valid_size self.validSampStIndex = validSampStInd self.validSampEndIndex = validSampEndInd self.embedding_size = embedding_size # Dimension of the embedding vector. self.skip_window = skip_window # How many words to consider left and right. self.num_skips = num_skips self.vocabSize = self.PreProcessObj.getVocabSize() self.lossfunction = lossfunction self.optimiser = optimiser self.learning_rate = learning_rate self.num_sampled_nce = num_sampled_nce ############################################################################### def generate_batch(self): """ Function to generate a training batch for the skip-gram model.""" assert self.batch_size % self.num_skips == 0 assert self.num_skips <= 2 * self.skip_window batch = np.ndarray(shape=(self.batch_size), dtype=np.int32) labels = np.ndarray(shape=(self.batch_size, 1), dtype=np.int32) span = 2 * self.skip_window + 1 # [ skip_window target skip_window ] # create a buffer to hold the current context buffer = collections.deque(maxlen=span) for _ in range(span): buffer.append(self.data[Word2Vec.data_index]) Word2Vec.data_index = (Word2Vec.data_index+1)%len(self.data) for i in range(self.batch_size // self.num_skips): target = self.skip_window # target label at the center of the buffer targets_to_avoid = [self.skip_window] for j in range(self.num_skips): while target in targets_to_avoid: target = random.randint(0, span - 1) targets_to_avoid.append(target) batch[i * self.num_skips + j] = buffer[self.skip_window] labels[i * self.num_skips + j, 0] = buffer[target] buffer.append(self.data[Word2Vec.data_index]) Word2Vec.data_index = (Word2Vec.data_index+1)%len(self.data) # Backtrack a little bit to avoid skipping words in the end of a batch Word2Vec.data_index = (Word2Vec.data_index + len(self.data) - span) % len(self.data) return batch, labels def createValidationSet(self): # Random set of words to evaluate similarity on. valid_window = range(self.validSampStIndex ,self.validSampEndIndex) # Only pick dev samples in the head of the distribution. self.valid_examples = np.random.choice(valid_window, self.valid_size, replace=False) self.valid_dataset = tf.constant(self.valid_examples, dtype=tf.int32) ################################################################################## def createEmbeddingMatrix(self): return tf.Variable(tf.random_uniform([self.vocabSize, self.embedding_size], -1.0, 1.0), name='Embedding') def weight_variable(self): return tf.Variable(tf.truncated_normal([self.vocabSize, self.embedding_size],stddev=1.0 / math.sqrt(self.embedding_size))) def bias_variable (self): return tf.Variable(tf.zeros([self.vocabSize])) def lossFunction(self,embeddings,train_inputs,train_labels,num_sampled_nce=64): if self.lossfunction == 'CE': train_one_hot = tf.one_hot(self.train_labels, self.vocabSize) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.embedding_size, labels=train_one_hot)) else: nce_weights = self.weight_variable() nce_biases = self.bias_variable() embed = tf.nn.embedding_lookup(embeddings, train_inputs) loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled_nce, num_classes=self.vocabSize)) return loss def chooseOptimiser(self, loss,mode='gradient_descent'): if mode =='gradient_descent': optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(loss) else: optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss) return optimizer ######################################################################### def writeLookUpTableToFile(self,dirToSave): file= open(dirToSave+'/metadata.tsv','w') file.write('Word\tID\n') for id,word in self.int2Word.items(): file.write(word+'\t'+str(id)+'\n') file.close() def setUpTensorBoard(self,dirToSave ,final_embeddings): from tensorflow.contrib.tensorboard.plugins import projector summary_writer = tf.summary.FileWriter(dirToSave) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = final_embeddings.name # Link this tensor to its metadata file (e.g. labels). embedding.metadata_path = os.path.join(dirToSave, 'metadata.tsv') # Saves a configuration file that TensorBoard will read during startup. projector.visualize_embeddings(summary_writer, config) def trainWord2Vec(self, dirToSave,modelToLoad='',saveIntermediateModels = False): ###Create a dataflow graph graph = tf.Graph() with graph.as_default(): train_inputs = tf.placeholder(tf.int32, shape=[self.batch_size]) train_labels = tf.placeholder(tf.int32, shape=[self.batch_size, 1]) self.createValidationSet() # Ops and variables pinned to the CPU because of missing GPU implementation embeddings = self.createEmbeddingMatrix() if self.lossfunction == 'nce': with tf.device('/cpu:0'): # Look up embeddings for inputs. loss = self.lossFunction(embeddings,train_inputs, train_labels,num_sampled_nce=self.num_sampled_nce) else: with tf.device('/gpu:0'): # Look up embeddings for inputs. loss = self.lossFunction(embeddings,train_inputs, train_labels,num_sampled_nce=self.num_sampled_nce) optimiser = self.chooseOptimiser(loss,mode=self.optimiser) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, self.valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # Add variable initializer. init = tf.global_variables_initializer() # Link Python program to C++ interface and execute operations on the graph num_steps = self.training_steps with graph.as_default(): saver = tf.train.Saver() with tf.Session(graph=graph) as session: # We must initialize all variables before we use them. init.run() if(modelToLoad !=''): saver.restore(session,modelToLoad) print('Initialized from Intermediate model') average_loss = 0 for step in xrange(num_steps): batch_inputs, batch_labels = self.generate_batch() feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels} _, loss_val = session.run([optimiser, loss], feed_dict=feed_dict) average_loss += loss_val if step % 2000 == 0: if step > 0: average_loss /= 2000 # The average loss is an estimate of the loss over the last 2000 batches. print('Average loss at step ', step, ': ', average_loss) average_loss = 0 # Note that this is expensive (~20% slowdown if computed every 500 steps) if step % 10000 == 0: sim = similarity.eval() for i in xrange(self.valid_size): valid_word = self.int2Word[self.valid_examples[i]] top_k = 8 # number of nearest neighbors nearest = (-sim[i, :]).argsort()[1:top_k + 1] log_str = 'Nearest to %s:' % valid_word for k in xrange(top_k): close_word = self.int2Word[nearest[k]] log_str = '%s %s,' % (log_str, close_word) print(log_str) if (saveIntermediateModels): saver.save(session,os.path.join(dirToSave,'model_intermediate.ckpt')) final_embeddings = normalized_embeddings.eval() ##Save model self.writeLookUpTableToFile(dirToSave) saver.save(session,os.path.join(dirToSave,'model.ckpt')) self.setUpTensorBoard(dirToSave,embeddings) return final_embeddings,self.word2Int def plot_with_labels(self,low_dim_embs, labels, filename='tsne.png'): assert low_dim_embs.shape[0] >= len(labels), 'More labels than embeddings' plt.figure(figsize=(18, 18)) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename) def displayResults(self,embeddings,validSampEndInd): try: tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) #plot_only = 500 low_dim_embs = tsne.fit_transform(embeddings[:validSampEndInd+500, :]) labels = [self.int2Word[i] for i in xrange(validSampEndInd+500)] self.plot_with_labels(low_dim_embs, labels) except ImportError: print('Please install sklearn, matplotlib, and scipy to show embeddings.')
for predictday in [14, 17, 20]: if not os.path.exists(Model_dir + 'predictday%i/' % predictday): os.mkdir(Model_dir + 'predictday%i/' % predictday) for i in range(8): if not os.path.exists(Model_dir + 'predictday%i/%i/' % (predictday, i)): os.mkdir(Model_dir + 'predictday%i/%i/' % (predictday, i)) # 结果文件路径 mse_file = 'csv_file/bignet_mse.txt' submit_file = 'csv_file/lstm.csv' #其他 model = bigNet() model.epochs = 25 p = PreProcess() def main(predictday, i): Xtrain, Wtrain, Ytrain, Xdev, Wdev, Ydev, Xtest, Wtest = p.generate_data4bignet( i, max([0, 36 * i - 1]), 36 * i + 2, predictday, 60, return_weight=True) model.input_dim = Xtrain.shape[1] model.save_dir = Model_dir + 'predictday%i/%i/' % (predictday, i) model.val_mse_min = np.inf # 训练模型
max_features=12) model.fit(Xtrain, Ytrain) return model def each_xgb_model(Xtrain, Ytrain): param = {} dtrain = xgb.DMatrix(Xtrain) dtest = xgb.DMatrix(Xtest) model = None return model mse_file = 'csv_file/lasso_mse.csv' if __name__ == '__main__': prepro = PreProcess() cand_list = prepro.select_nearest() Ysubmit = [] mse_mat = [] for predictday in [14, 17, 20]: Xtrain, Ytrain, Xdev, Ydev, Xtest = prepro.readdata(predictday) mse_station = [] Vy = [] # 拟合模型 Yhat = [] Vy = [] for i in range(228): Xtrain0, Ytrain0 = Xtrain[:, cand_list[i], :].reshape( Xtrain.shape[0], -1), Ytrain[:, i] Xdev0, Ydev0 = Xdev[:, cand_list[i], :].reshape(Xdev.shape[0], -1), Ydev[:, i]
import numpy as np from PreProcess import PreProcess import xgboost as xgb p = PreProcess() params = { 'booster': 'gbtree', 'objective': 'reg:linear', # 多分类的问题 'gamma': 0.1, # 用于控制是否后剪枝的参数,越大越保守,一般0.1、0.2这样子。 'max_depth': 12, # 构建树的深度,越大越容易过拟合 'lambda': 2, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。 'subsample': 0.7, # 随机采样训练样本 'colsample_bytree': 0.7, # 生成树时进行的列采样 'min_child_weight': 3, 'silent': 0, # 设置成1则没有运行信息输出,最好是设置为0. 'eta': 0.01, # 如同学习率 'seed': 1000, 'nthread': 4, # cpu 线程数 'eval_metric': 'rmse' } xgb.Booster() def modelfit(alg, dtrain, predictors, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
if __name__ == '__main__': from NeuroIO import NeuroIO from PreProcess import PreProcess from Cluster import Cluster neuroread = NeuroIO(r"C:\Users\USERNAME\Downloads\Brainbow-demo.tif") img_data = neuroread.img_data_return()[0] img_path = neuroread.img_data_return()[1] pre_processed_data = PreProcess(im_data=img_data, filepath=img_path, snapshotpath=r"C:\UROP\\", multiprocess=True, cores="auto") pre_processed_data.blur(blur_sigma=0.5, xyz_scale=(1, 1, 1)) pre_processed_data.find_threshold(method="isodata", snapshot=True) refined_mask = pre_processed_data.sobel_watershed(threshold="last", snapshot=True) pre_processed_data.lab_mode() img_lab = pre_processed_data.return_data() segment = Cluster(img_data=img_lab, mask=refined_mask) cluster_results = segment.super_pixel(start_frame=0, end_frame=100, size_threshold=75, max_dist=19, min_samp=10, dist_weight=3.0, color_weight=18.0, metric="euclidean", algo="auto", multiprocess=True, num_cores="auto",
def preprocess(self): preprocessor = PreProcess(self.h_params) for data_name in self.h_params.data.name_list: preprocessor.preprocess_data(data_name)
def validate_VICON_PEACK(P_dir, P_Fs, V_dir, V_Fs, alignment='aligned'): P_fileinfo = os.listdir(P_dir) V_fileinfo = os.listdir(V_dir) if len(P_fileinfo) != len(V_fileinfo): print( 'Error: Mismatch in number of records for PEACK and VICON system') P_Err = [] V_Err = [] NA = [] PA_Err = [] VA_Err = [] RotMatrices = [] CenterCoords = [] for i in range(0, len(P_fileinfo)): P_fn = P_dir + os.sep + P_fileinfo[i] V_fn = V_dir + os.sep + V_fileinfo[i] temp = pd.read_csv(P_fn) temp = temp.values P_time = temp[:, 0] P_data = temp[:, 1:] temp = pd.read_csv(V_fn, skiprows=4) temp = temp.values V_time = temp[:, 0] / V_Fs V_data = temp[:, 2:] del (temp) P_data = PreProcess(P_data) V_data = PreProcess(V_data) P_UpperLimb_data = get_PEACK_UL_prop(P_data, P_Fs, -P_Fs) V_UpperLimb_data = get_VICON_UL_prop(V_data, V_Fs, -V_Fs) / 1000.0 N_joints = 8 for j in range(0, N_joints): joint = np.squeeze(P_UpperLimb_data[j, :, :]) joint = medfilt(joint, [99, 1]) P_UpperLimb_data[j, :, :] = savgol_filter(joint, 29, 3, axis=0) V_assym = [] for t in range(0, V_UpperLimb_data.shape[1]): joint = np.squeeze(V_UpperLimb_data[:, t, :]) #err = Asymmetry_Error2(joint,'VICON') err = proprioception_angles(joint) #V_X0, V_Joints1, V_normal = getNormal(joint, 'VICON') #err = Reflection_Error(V_Joints1, V_normal, V_X0, 'VICON') V_assym.append(err) V_assym = np.array(V_assym) P_assym = [] for t in range(0, P_UpperLimb_data.shape[1]): joint = np.squeeze(P_UpperLimb_data[:, t, :]) #err = Asymmetry_Error2(joint,'VICON') err = proprioception_angles(joint) #P_X0, P_Joints1, P_normal = getNormal(joint, 'PEACK') #err = Reflection_Error(P_Joints1, P_normal, P_X0, 'PEACK') P_assym.append(err) P_assym = np.array(P_assym) t1 = P_time[P_Fs - 1:] t2 = V_time[V_Fs - 1:] pinched = P_assym[-P_Fs * 3:-P_Fs * 1] P_Err.append(np.mean(pinched)) pinched = V_assym[-V_Fs * 3:-V_Fs * 1] V_Err.append(np.mean(pinched)) #plot_error_timeseries(100*P_assym, 100*V_assym, t1, t2) P_Err = np.array(P_Err) V_Err = np.array(V_Err) return P_Err, V_Err, NA, 0, 0
def data_preprocess(): print('Loading data...') preprocess = PreProcess() test_X_w2v, test_Y = preprocess.data_preprocess(test_data_path) # 测试数据 return test_X_w2v, test_Y
def process_VICON(dir, Fs, Method='Pose'): fileinfo = os.listdir(dir) #print(fileinfo) Vsym = [] NA = [] A_Err = [] CenterCoords = [] for i in range(0, len(fileinfo)): #range(3,4):# if fileinfo[i].endswith(".csv") != True: continue fn = dir + os.sep + fileinfo[i] #print(fn) temp = pd.read_csv(fn, skiprows=4) temp = temp.values time = temp[:, 0] / Fs data = temp[:, 2:] del (temp) data = PreProcess(data) UpperLimb_data = get_VICON_UL_prop(data, Fs, -Fs) / 1000.0 N_joints = 8 for j in range(0, N_joints): joint = np.squeeze(UpperLimb_data[j, :, :]) #joint_old = joint.copy() joint = butter_lowpass_filter(joint, 6, Fs, order=5) UpperLimb_data[j, :, :] = joint #plot_symmetries(joint_old,joint) assym = [] if (Method != 'Stability'): for t in range(0, UpperLimb_data.shape[1]): #range(8*Fs,8*Fs+1) joint = np.squeeze(UpperLimb_data[:, t, :]) err = 0 if (Method == 'Pose'): err = Asymmetry_Error2(joint, 'VICON') else: err = proprioception_angles(joint) #import pdb; pdb.set_trace() #V_X0, V_Joints1, V_normal = getNormal(joint, 'VICON') #err = Reflection_Error(V_Joints1, V_normal, V_X0, 'VICON') assym.append(err) assym = np.array(assym) t1 = time[Fs - 1:] #plot_error_timeseries([], 100*assym, [], t1,fileinfo[i]) pinched = assym[-Fs * 3:-Fs * 1] Vsym.append(np.mean(pinched)) #import pdb; pdb.set_trace() else: Jt = UpperLimb_data[:, -Fs * 3:-Fs * 1, :] dist = 100 * joint_stability(Jt) # Converting to cm Vsym.append(dist) t1 = time[-Fs * 2:-Fs * 1] #plot_symmetries( UpperLimb_data[2,:,0], UpperLimb_data[5,:,0]) #plot_symmetries(Jt[2,:,0],Jt[5,:,0]) Vsym = np.array(Vsym) return Vsym
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # 模型保存路径 Model_dir = 'crnnModel/' if not os.path.exists(Model_dir): os.mkdir(Model_dir) for predictday in [14, 17, 20]: if not os.path.exists(Model_dir + 'predictday%i'%predictday): os.mkdir(Model_dir + 'predictday%i'%predictday) # 结果文件路径 mse_file = 'csv_file/crnn_mse_1.txt' submit_file = 'csv_file/submit_crnn_1.csv' # 训练数据等 preprocess = PreProcess() model = TrafficCRNN() cand_list = preprocess.select_nearest() Train_data = dict() for predictday in [14, 17, 20]: Train_data[predictday] = preprocess.readdata(predictday) def main(predictday, i): Xtrain, Ytrain, Xdev, Ydev, Xtest = Train_data[predictday] Xtrain, Ytrain = Xtrain[:, cand_list[i], :], Ytrain[:, i] Xdev, Ydev = Xdev[:, cand_list[i], :], Ydev[:, i] Xtest = Xtest[:, cand_list[i], :] # 模型参数 model.input_dim = len(cand_list[i]) model.val_mse_min = np.inf
class Test(): def __init__(self, h_params: HParams): self.pre_processor = PreProcess(h_params) self.device = h_params.resource.device self.h_params = h_params self.util = Util() self.batch_size = self.h_params.train.batch_size if h_params.train.model == "JDCUNET": self.model = JDCPlusUnet(self.h_params.resource.device).to( self.h_params.resource.device) if h_params.train.model == "UNETONLY": self.model = UnetOnly(self.h_params.resource.device).to( self.h_params.resource.device) self.phase = None self.normalize_value = 0 self.output_path = None self.output_name = "" def output(self, pretrain_path: str, audio_input_path, audio_name): print("load pretrained model") pretrain_name = pretrain_path.split("/")[-1].replace(".pth", "") + "/" self.output_path = self.h_params.test.output_path + "/" + pretrain_name os.makedirs(self.output_path, exist_ok=True) self.output_name = self.h_params.log.time + audio_name.replace( ".wav", "") best_model_load = torch.load(pretrain_path, map_location='cpu') self.model.load_state_dict(best_model_load) self.model.to(self.device) #self.input_to_output(audio_input_path) if self.h_params.train.model == "JDCUNET": input_jdc, input_unet = self.make_model_input_jdcunet( audio_input_path) if self.h_params.train.model == "UNETONLY": input_unet = self.make_model_input(audio_input_path) mask_vocal = None mask_accom = None just_test_input = None for start_idx in range(0, input_unet.shape[0], self.batch_size): print(f"making mask {start_idx}/{input_unet.shape[0]}") if self.h_params.train.model == "JDCUNET": input_seg = input_unet[start_idx:start_idx + self.batch_size] input_seg = input_seg.to(self.device) input_jdc_seg = input_jdc[start_idx:start_idx + self.batch_size] input_jdc_seg = input_jdc_seg.to(self.device) with torch.no_grad(): mask = self.make_mask_JDCUNET(input_jdc_seg, input_seg) if self.h_params.train.model == "UNETONLY": input_seg = input_unet[start_idx:start_idx + self.batch_size] input_seg = input_seg.to(self.device) with torch.no_grad(): mask = self.make_mask_UNET(input_seg) if self.h_params.test.is_binary_mask: mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 mask_a = torch.ones_like(mask) - mask just_test_input = self.make_output_spectro(just_test_input, input_seg) mask_vocal = self.make_output_spectro(mask_vocal, mask) mask_accom = self.make_output_spectro(mask_accom, mask_a) just_test_input_numpy = self.torch_to_np_spec(just_test_input) mask_v_numpy = self.torch_to_np_spec(mask_vocal) mask_a_numpy = self.torch_to_np_spec(mask_accom) self.mask_histogram(mask_v_numpy) masked_v_numpy = just_test_input_numpy * mask_v_numpy * self.normalize_value masked_a_numpy = just_test_input_numpy * mask_a_numpy * self.normalize_value just_test_input_numpy = just_test_input_numpy * self.normalize_value self.plot_spec(masked_v_numpy, "vocal") #self.plot_spec(just_test_input_numpy,"input") #self.inverse_stft_griffin_lim(masked_v_numpy,"vocal") #self.inverse_stft_griffin_lim(masked_a_numpy,"accom") self.inverse_stft( masked_v_numpy * self.phase[:, :masked_v_numpy.shape[1]], "vocal") self.inverse_stft( masked_a_numpy * self.phase[:, :masked_a_numpy.shape[1]], "accom") self.inverse_stft( just_test_input_numpy * self.phase[:, :masked_v_numpy.shape[1]], "restored_input") def make_output_spectro(self, masking_spectro, output_model): result = masking_spectro output_model = output_model.to(torch.device('cpu')) for i in range(0, output_model.shape[0]): output_seg = (output_model[i].squeeze(0)).transpose(0, 1) result = self.concatenation(result, output_seg) return result def concatenation(self, x, y, dimension=1): if x is None: return y else: return torch.cat((x, y), 1) def make_mask_JDCUNET(self, jdc_seg, unet_seg): pitch, voice, vocal_mask, accom_maks = self.model(jdc_seg, unet_seg) return vocal_mask def make_mask_UNET(self, unet_seg): vocal_mask, accom_maks = self.model(unet_seg) return vocal_mask def make_model_input(self, audio_input_path): mag_unet, normalize_value, phase = self.util.magnitude_spectrogram( audio_input_path, self.h_params.preprocess.sample_rate, self.h_params.preprocess.nfft, self.h_params.preprocess.window_size, self.h_params.preprocess.hop_length, get_pahse=True) self.phase = phase self.normalize_value = normalize_value input_unet = [] for start_idx in range( 0, mag_unet.shape[1], self.h_params.preprocess.model_input_time_frame_size): end_idc = start_idx + self.h_params.preprocess.model_input_time_frame_size input_unet_seg = np.swapaxes(mag_unet[1:, start_idx:end_idc], axis1=0, axis2=1) if input_unet_seg.shape[ 0] != self.h_params.preprocess.model_input_time_frame_size: continue input_unet.append([input_unet_seg]) input_unet_tensor = torch.tensor(input_unet) return input_unet_tensor def make_model_input_jdcunet(self, audio_input_path): mag_unet, normalize_value, phase = self.util.magnitude_spectrogram( audio_input_path, self.h_params.preprocess.sample_rate, self.h_params.preprocess.nfft, self.h_params.preprocess.window_size, self.h_params.preprocess.hop_length, get_pahse=True) self.phase = phase self.normalize_value = normalize_value jdc_mag_mix, _ = self.util.magnitude_spectrogram( audio_input_path, self.h_params.preprocess.jdc_sampling_rate, self.h_params.preprocess.jdc_nfft, self.h_params.preprocess.jdc_window_size, self.h_params.preprocess.jdc_hop_length) input_unet = [] input_jdc = [] for start_idx in range( 0, mag_unet.shape[1], self.h_params.preprocess.model_input_time_frame_size): end_idx = start_idx + self.h_params.preprocess.model_input_time_frame_size input_unet_seg = np.transpose(mag_unet[1:, start_idx:end_idx]) input_jdc_seg = np.transpose(jdc_mag_mix[:, start_idx:end_idx]) if input_unet_seg.shape[ 0] != self.h_params.preprocess.model_input_time_frame_size: continue input_unet.append([input_unet_seg]) input_jdc.append([input_jdc_seg]) input_unet_tensor = torch.tensor(input_unet) input_jdc_tensor = torch.tensor(input_jdc) return input_jdc_tensor, input_unet_tensor def inverse_stft_griffin_lim(self, stft_mat, name): filename = self.output_path + self.h_params.time_for_output + "_griffin_" + self.output_name + "_" + name + ".wav" istft_mat = librosa.griffinlim( abs(stft_mat), hop_length=self.h_params.stft_hop_length, win_length=self.h_params.stft_window_size) sf.write(filename, istft_mat, self.h_params.down_sample_rate) return istft_mat def inverse_stft(self, stft_mat, name): filename = self.output_path + self.output_name + "_" + name + ".wav" istft_mat = librosa.core.istft( stft_mat, hop_length=self.h_params.preprocess.hop_length, win_length=self.h_params.preprocess.window_size) sf.write(filename, istft_mat, self.h_params.preprocess.sample_rate) return istft_mat def plot_spec(self, spectro, name): filename = self.output_path + self.output_name + "_" + name + ".png" plt.figure() librosa.display.specshow( librosa.amplitude_to_db(np.abs(spectro), ref=np.max)) plt.colorbar() plt.savefig(filename, dpi=600) def mask_histogram(self, mask, name="vocal_mask_histogram"): plt.hist(mask.flatten()) plt.savefig(self.output_path + "_" + name + ".png", dpi=300) def torch_to_np_spec(self, torch_spec): numpy_spec = torch_spec.detach().cpu().numpy() numpy_spec = numpy_spec zero_padd = np.zeros((1, numpy_spec.shape[1])) numpy_spec = np.concatenate((zero_padd, numpy_spec), 0) return numpy_spec def input_to_output(self, audio_input_path): y = self.pre_processor.audio_load(audio_input_path, self.h_params.test_audio_sample_rate) filename = self.output_path + "_original_input.wav" sf.write(filename, y, self.h_params.down_sample_rate)