def demo(self, pt): img_ori = cv2.imread(pt) if self.resize: img, resize_ratio, dw, dh = letterbox_resize( img_ori, self.new_size[0], self.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(self.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] - 127.5 boxes_, scores_, labels_ = self.sess.run( [self.boxes, self.scores, self.labels], feed_dict={self.input_data: img}) # rescale the coordinates to the original image if letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i].astype(np.int) cv2.rectangle(img_ori, (x0, y0), (x1, y1), (0, 200, 255), 4) res, con = HyperLPR_plate_recognition(img_ori, (x0, y0, x1, y1)) label = '置信度: {:.2f}%\n'.format( scores_[i] * 100) + self.get_time(res) img_ori = self.drawTest(img_ori, label, 10, 10) cv2.imshow('result', img_ori) cv2.waitKey(0)
def demo(input_image): img_ori = cv2.imread(input_image) if resize: img, resize_ratio, dw, dh = letterbox_resize( img_ori, new_size[0], new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] - 127.5 boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori/float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori/float(new_size[1])) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[ i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) cv2.imshow('result', img_ori) cv2.waitKey(0)
def GetBoundingBox(self, img_ori, half): from utils.data_aug import letterbox_resize if img_ori.shape[2] == 4: img_ori = img_ori[:, :, :3] if half: # Grab left half of the image height, width = img_ori.shape[:2] start_row, start_col = int(0), int(0) end_row, end_col = int(height), int(width // 2) img_ori = img_ori[start_row:end_row, start_col:end_col] self.height_ori, self.width_ori = img_ori.shape[:2] # print('start resize') if self.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( img_ori, self.new_size[0], self.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(self.new_size)) # print('resize end') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. # print('run TF') boxes_, scores_, labels_, map4_ = self.sess.run( [self.boxes, self.scores, self.labels, self.map4], feed_dict={self.input_data: img}) # print('after TF') # rescale the coordinates to the original image if self.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(self.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(self.new_size[1])) return boxes_, scores_, map4_[0]
"--restore_path", type=str, default= "/media/ubutnu/fc1a3be7-9b03-427e-9cc9-c4b242cefbff/YOLOv3_TensorFlow/checkpoint/model-epoch_90_step_175083_loss_0.4213_lr_1e-05", help="The path of the weights to restore.") args = parser.parse_args() args.anchors = parse_anchors(args.anchor_path) args.classes = read_class_names(args.class_name_path) args.num_class = len(args.classes) color_table = get_color_table(args.num_class) img_ori = cv2.imread(args.input_image) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
def yolodet(image_path, anchor_path=rootpath + "/yolo/data/yolo_anchors.txt", new_size=[416, 416], letterbox=True, class_name_path=rootpath + "/yolo/data/coco.names", restore_path=rootpath + "/yolo/data/best_model"): anchors = parse_anchors(anchor_path) classes = read_class_names(class_name_path) num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(image_path) if letterbox: img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if letterbox: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) tf.reset_default_graph() #transform detections into 1 line (#1class,#1conf,#1xmin,#1ymin,#1max,#1ymax,#2class,#2conf,...) boxes = [] for i in range(np.shape(boxes_)[0]): boxes.append(labels_[i]) boxes.append(scores_[i]) boxes.extend(boxes_[i, :]) return boxes
def yolodet(anchor_path, image_path, new_size, letterbox, class_name_path, restore_path): anchors = parse_anchors(anchor_path) classes = read_class_names(class_name_path) num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(image_path) if letterbox: img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if letterbox: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori/float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori/float(new_size[1])) # print("box coords:") # print(boxes_) # print('*' * 30) # print("scores:") # print(scores_) # print('*' * 30) # print("labels:") # print(labels_) # # for i in range(len(boxes_)): # x0, y0, x1, y1 = boxes_[i] # plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # cv2.imshow('Detection result', img_ori) # cv2.imwrite('detection_result.jpg', img_ori) # cv2.waitKey(0) tf.reset_default_graph() return boxes_, scores_, labels_
feature_map_1 = sess.graph.get_tensor_by_name( 'yolov4tiny/head/feature_map_1:0') feature_map_2 = sess.graph.get_tensor_by_name( 'yolov4tiny/head/feature_map_2:0') pred_boxes, pred_confs, pred_probs = yolo_model.predict( [feature_map_1, feature_map_2]) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, class_num, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) img_ori = cv2.imread("./data/demo_data/messi.jpg") img, resize_ratio, dw, dh = letterbox_resize(img_ori, img_size[0], img_size[1]) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input: img}) boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
def test_display_one_img(img_path): print(img_path) img_ori = cv2.imread(img_path) print(img_ori.shape) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with sess_yolo.as_default(): with graph_yolo.as_default(): boxes_, scores_, labels_ = sess_yolo.run( [boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) for j in range(len(boxes_)): x0, y0, x1, y1 = boxes_[j] x0 = np.maximum(x0, 0) y0 = np.maximum(y0, 0) x1 = np.maximum(x1, 0) y1 = np.maximum(y1, 0) label_index = labels_[j] # Crop the detected traffic signs if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0: # img_ori_ = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB).astype(np.float32) img_cropped = img_ori[int(y0):int(y1), int(x0):int(x1)] if img_cropped.shape[0] < 10 or img_cropped.shape[1] < 10: continue # cv2.imwrite('D:/Data/TrafficSigns/test/test_{}.png'.format(j), img_cropped) img_cropped = cv2.resize(img_cropped, (params.image_size, params.image_size)) img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2RGB) img_cropped = img_cropped / 255.0 # print(img_cropped) # np.savetxt('D:/Data/test_result/img.txt', img_cropped, fmt='%f', delimiter=',') if img_cropped.any(): # tf.reset_default_graph() # new_graph = tf.Graph() with graph_triplet.as_default(): with sess_triplet.as_default(): image_input = test_input_fn(img_cropped, params) image_input = sess_triplet.run(image_input) label_index = sess_triplet.run( predict_labels, feed_dict={inputs: image_input}) label_index = label_index[0] + 3 print(label_index) # with open('D:/Data/test_result/outputs.txt', 'w') as ff: # ff.writelines(ff) # np.savetxt('D:/Data/test_result/outputs.txt', out, fmt='%f', delimiter=',') plot_one_box(img_ori, [x0, y0, x1, y1], label_index=label_index, label=args.classes_all[label_index] + ', {:.2f}%'.format(scores_[j] * 100), color=color_table[labels_[j]]) cv2.namedWindow('Detection result', 0) cv2.resizeWindow('Detection result', 2400, 1800) cv2.imshow('Detection result', img_ori) cv2.imwrite('detection_result.jpg', img_ori) cv2.waitKey(0)
def test_one_img(img_path): img_ori = cv2.imread(img_path) img_name = img_path.strip().split('\\')[-1] img_name = img_name.split('.')[0] if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with sess_yolo.as_default(): with graph_yolo.as_default(): boxes_, scores_, labels_ = sess_yolo.run( [boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) for j in range(len(boxes_)): x0, y0, x1, y1 = boxes_[j] x0 = np.maximum(x0, 0) y0 = np.maximum(y0, 0) x1 = np.maximum(x1, 0) y1 = np.maximum(y1, 0) label_index = labels_[j] # Crop the detected traffic signs # the bbox of traffic signs must be big enough if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0: # img_ori_ = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB).astype(np.float32) img_cropped = img_ori[int(y0):int(y1), int(x0):int(x1)] if img_cropped.shape[0] < 10 or img_cropped.shape[1] < 10: continue img_cropped = cv2.resize(img_cropped, (params.image_size, params.image_size)) img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2RGB) img_cropped = img_cropped / 255.0 # cv2.imwrite('D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\{}_{}.jpg'.format(img_name, j), img_cropped*255.0) # img_cropped_path = 'D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\{}_{}.jpg'.format(img_name, j) # print(img_cropped_path) # cv2.imwrite('D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\1_0.jpg', img_cropped*255.0) if img_cropped.any(): # tf.reset_default_graph() # new_graph = tf.Graph() with graph_triplet.as_default(): with sess_triplet.as_default(): image_input = test_input_fn(img_cropped, params) image_input = sess_triplet.run(image_input) label_index = sess_triplet.run( predict_labels, feed_dict={inputs: image_input}) label_index = label_index[0] + 3 # with open('D:/Data/test_result/detect_result_self_collect.txt', 'a+') as f: # f.write(img_path + ' ' + str(x0) + ' ' + str(y0) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str( # label_index[0]+2) + '\n') if isinstance(label_index, np.ndarray): label_index = label_index[0] with open('D:\Data\TrafficSigns\\test_images/detect_result.txt', 'a+') as f: f.write(img_path + ' ' + str(x0) + ' ' + str(y0) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(label_index) + '\n')
def estimatePose(): parser = argparse.ArgumentParser( description="YOLO-V3 video test procedure.") # parser.add_argument("input_video", type=str, # help="The path of the input video.") parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt", help="The path of the anchor txt file.") parser.add_argument( "--new_size", nargs='*', type=int, default=[416, 416], help= "Resize the input image with `new_size`, size format: [width, height]") parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True, help="Whether to use the letterbox resize.") parser.add_argument("--class_name_path", type=str, default="./data/my_data/YOLOPose.names", help="The path of the class names.") parser.add_argument("--restore_path", type=str, default="./data/pose_weights/lunge_best", help="The path of the weights to restore.") parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=True, help="Whether to save the video detection results.") args = parser.parse_args() args.anchors = parse_anchors(args.anchor_path) args.classes = read_class_names(args.class_name_path) args.num_class = len(args.classes) color_table = get_color_table(args.num_class) # vid = cv2.VideoCapture(args.input_video) vid = cv2.VideoCapture('./data/demo/lunge_03.mp4') # vid = cv2.VideoCapture(r'C:\Users\soma\SMART_Referee\SMART_Referee_DL\data\lunge\video\lunge_03.mp4') video_frame_cnt = int(vid.get(7)) video_width = int(vid.get(3)) video_height = int(vid.get(4)) video_fps = int(vid.get(5)) trainer_pose = pd.read_csv('./data/ground_truth/output_right.csv', header=None) trainer_pose = trainer_pose.loc[:, [ 0, 1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ]] pca_df = trainer_pose.loc[:, [ 1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ]] pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 1]] = pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 1]] * video_width / 416 pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 0]] = pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 0]] * video_height / 416 pca_df = pca_df.astype(int) pca_df = pca_df.replace(0, np.nan) pca_df = pca_df.dropna() pca_df.describe() pca = PCA(n_components=1) pca.fit(pca_df) size = [video_width, video_height] list_p = [] waist_err = 0 critical_point = 0 past_idx = 0 startTrig = 0 cntdown = 90 t = 0 TRLEN = len(trainer_pose) modify_ankle = pca_df.iloc[0, :].values base_rect = [(int(video_width / 4), int(video_height / 10)), (int(video_width * 3 / 4), int(video_height * 19 / 20))] c_knee = c_waist = c_speed = 0 if args.save_video: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height)) with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, args.restore_path) for i in range(video_frame_cnt): ret, img_ori = vid.read() if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) people_pose = get_people_pose(boxes_, labels_, base_rect) # list-dict people_pose = np.array([p[1] for p in people_pose[0] ]).flatten() # dict-tuple -> list people_pose = people_pose[[ 0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 ]] # Start Trigger if startTrig == 2: pass elif startTrig == 0: # start # 기준 박스 cv2.rectangle(img_ori, base_rect[0], base_rect[1], (0, 0, 255), 2) if isInBox(people_pose, base_rect[0], base_rect[1]): # t_resize_pose = resize_pose(people_pose, trainer_pose.iloc[0, 1:].values) t_resize_pose = resize_pose(people_pose, pca_df.iloc[0, :].values) img_ori = draw_ground_truth(img_ori, t_resize_pose) # img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values) startTrig = isStart(people_pose, trainer_pose.iloc[0, 1:].values, size) cv2.imshow('image', img_ori) if cv2.waitKey(1) & 0xFF == ord('q'): break continue else: print("박스안에 들어와주세요!!") continue elif startTrig == 1: img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values) cv2.putText(img_ori, str(int(cntdown / 30)), (100, 300), cv2.FONT_HERSHEY_SIMPLEX, 10, (255, 0, 0), 10) cv2.imshow('image', img_ori) cntdown -= 1 if cntdown == 0: startTrig = 2 if cv2.waitKey(1) & 0xFF == ord('q'): break continue '''check ankle : 편차 40이상 발생시 전에 값 으로 업데이트''' people_pose = check_ankle(list_p, people_pose, modify_ankle, size) # f = open('user.csv', 'a', encoding='utf-8', newline='') # wr = csv.writer(f) # wr.writerow(people_pose) # ground truth 그리기 list_p.append(people_pose) img_ori = draw_ground_truth(img_ori, pca_df.iloc[t, :].values) if check_waist(people_pose): waist_err += 1 if waist_err is 60: # waist_err는 60번 틀리면 피드백함 feedback_waist() c_waist += 1 waist_err = 0 if trainer_pose.iloc[t, 0] == 1: # t는 특정 시점 + i frame critical_point += 1 if critical_point % 2 == 0: my_pose = makeMypose_df(list_p) c_speed = check_speed( my_pose, trainer_pose.iloc[past_idx:t + 1, 1:], pca, c_speed) c_knee = check_knee(people_pose, c_knee) modify_ankle = list_p[-1] list_p = [] past_idx = t t += 1 if t == TRLEN: break # img_ori = draw_body(img_ori, boxes_, labels_) # for i in range(len(boxes_)): # x0, y0, x1, y1 = boxes_[i] # plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # 사용자 자세 그리기 # img_ori = draw_truth(img_ori, people_pose) end_time = time.time() cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) cv2.imshow('image', img_ori) if args.save_video: videoWriter.write(img_ori) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break vid.release() cv2.destroyAllWindows() if args.save_video: videoWriter.release() f = open('./data/score/result.csv', 'a', encoding='utf-8', newline='') wr = csv.writer(f) d = datetime.today().strftime("%Y/%m/%d") t = datetime.today().strftime("%H:%M:%S") wr.writerow([d, t, c_knee, c_waist, c_speed])
def test_display_one_img(img_path): img_ori = cv2.imread(img_path) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with sess_yolo.as_default(): with graph_yolo.as_default(): boxes_, scores_, labels_ = sess_yolo.run( [boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) for j in range(len(boxes_)): x0, y0, x1, y1 = boxes_[j] x0 = np.maximum(x0, 0) y0 = np.maximum(y0, 0) x1 = np.maximum(x1, 0) y1 = np.maximum(y1, 0) label_index = labels_[j] # Crop the detected traffic signs if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0: img_ori_ = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB).astype(np.float32) img_cropped = img_ori_[int(y0):int(y1), int(x0):int(x1)] if img_cropped.any(): tf.reset_default_graph() new_graph = tf.Graph() with new_graph.as_default(): with tf.Session(graph=new_graph) as new_sess: siamese_model = SiameseNet() siamese_model.load_weights( '/home/tracy/PycharmProjects/SiameseNet/checkpoint/RGBscaled/best/my_model' ) img1, img2 = dataloader(img_cropped) label_pred, label_score, _ = siamese_model.prediction( img1, img2) label_pred_, label_score_ = new_sess.run( [label_pred, label_score]) # with sess_siam.as_default(): # with sess_siam.graph.as_default(): # img1, img2 = dataloader(img_cropped) # label_pred, label_score, _ = siamese_model.prediction(img1, img2) # label_pred_, label_score_ = sess_siam.run([label_pred, label_score]) # cv2.imwrite('/home/tracy/YOLOv3_TensorFlow/temp/' + str(i) + '_' + str(j) + '.jpg', img_cropped) # print("Writting %s"%img) # test_one_img('/home/tracy/data/TrafficSign_test/Images1/' + img) # print('Done writing %s'%img) # Choose the one label with highest score pred_labels = np.nonzero(label_pred_) pred_scores = label_score_[pred_labels] # print("pred_scores: ", pred_scores) if len(pred_scores) > 0: label_index = np.argmax(pred_scores) label_index = pred_labels[0][label_index] + 2 # labels_[j] = label_index plot_one_box(img_ori, [x0, y0, x1, y1], label_index=label_index, label=args.classes_all[label_index] + ', {:.2f}%'.format(scores_[j] * 100), color=color_table[labels_[j]]) cv2.namedWindow('Detection result', 0) cv2.resizeWindow('Detection result', 2400, 1800) cv2.imshow('Detection result', img_ori) cv2.imwrite('detection_result.jpg', img_ori) cv2.waitKey(0)
async def detection(self, img_ori, mode, detection_marker): if self.letterbox_resizes: img, resize_ratio, dw, dh = letterbox_resize( img_ori, self.new_size[0], self.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(self.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start = time.time() boxes_, scores_, labels_ = self.sess.run( [self.boxes, self.scores, self.labels], feed_dict={self.input_data: img}) end = time.time() print(end - start) # rescale the coordinates to the original image if self.letterbox_resizes: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(self.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(self.new_size[1])) # sort -- tracker for each person dets = [] if len(boxes_) > 0: for i in range(len(boxes_)): x, y, w, h = boxes_[i] dets.append([x, y, x + w, y + h, scores_[i]]) # np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) dets = np.asarray(dets) tracks = self.tracker.update(dets) new_boxes = [] indexIDs = [] previous = self.memory.copy() self.memory = {} for track in tracks: new_boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track[4])) self.memory[indexIDs[-1]] = new_boxes[-1] if len(new_boxes) > 0: i = 0 for box in new_boxes: x = int(box[0]) y = int(box[1]) w = int(box[2]) h = int(box[3]) color = [ int(c) for c in self.COLORS[indexIDs[i] % len(self.COLORS)] ] if indexIDs[i] in previous: previous_box = previous[indexIDs[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + 10), int(y + 100)) p1 = (int(x2 + 10), int(y2 + 100)) cv2.line(img_ori, p0, p1, color, 3) # tracker line if intersect(p0, p1, (detection_marker.X1, detection_marker.Y1), (detection_marker.X2, detection_marker.Y2)): self.counter += 1 if mode == 'PH': if self.classes[ labels_[i]] == 'PHV' or self.classes[ labels_[i]] == 'PH': pass else: self.violation += 1 elif mode == 'PV': if self.classes[ labels_[i]] == 'PHV' or self.classes[ labels_[i]] == 'PV': pass else: self.violation += 1 elif self.classes[labels_[i]] != mode: self.violation += 1 i += 1 cv2.line(img_ori, (detection_marker.X1, detection_marker.Y1), (detection_marker.X2, detection_marker.Y2), (0, 255, 255), 3) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=self.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=self.color_table[labels_[i]]) if mode == 'PH': if self.classes[labels_[i]] == 'PHV' or self.classes[ labels_[i]] == 'PH': pass else: cv2.putText(img_ori, 'Please wear: a helmet', (550, 40), 0, 1, (0, 0, 255), 2) elif mode == 'PV': if self.classes[labels_[i]] == 'PHV' or self.classes[ labels_[i]] == 'PV': pass else: cv2.putText(img_ori, 'Please wear: a safety vest', (550, 40), 0, 1, (0, 0, 255), 2) elif mode == 'PLC' and self.classes[labels_[i]] != 'PLC': cv2.putText(img_ori, 'Please wear: a lab coat', (550, 40), 0, 1, (0, 0, 255), 2) elif mode == 'PHV' and self.classes[labels_[i]] != 'PHV': cv2.putText(img_ori, 'Please wear: a helmet and a safety vest', (550, 40), 0, 1, (0, 0, 255), 2) elif self.classes[labels_[i]] != mode: cv2.putText(img_ori, 'Please wear: ' + str(mode), (550, 40), 0, 1, (0, 0, 255), 2) # print({'TotalViolation': self.violation,'TotalPeople':self.counter}) # cv2.putText(img_ori, mode+' Mode', (300, 40), 0, # fontScale=1, color=(0, 255, 0), thickness=2) # cv2.putText(img_ori, 'People Count: '+ str(self.counter), (40, 620), 0, # 1, (255,255,255), 2) # cv2.putText(img_ori, 'Violation Count: '+ str(self.violation), (40, 660), 0, # 1, (255,255,255), 2) return { 'TotalViolation': self.violation, 'TotalPeople': self.counter }, img_ori
def video_detect(input_args): vid = cv2.VideoCapture(input_args.input_video) video_frame_cnt = int(vid.get(7)) video_width = int(vid.get(3)) video_height = int(vid.get(4)) video_fps = int(vid.get(5)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') video_writer = cv2.VideoWriter(pred_args.output_video, fourcc, video_fps, (video_width, video_height)) with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, pred_args.new_size[1], pred_args.new_size[0], 3], name='input_data') yolo_model = yolov3(pred_args.num_class, pred_args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms( pred_boxes, pred_scores, pred_args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45 ) saver = tf.train.Saver() saver.restore(sess, pred_args.weight_path) for i in range(video_frame_cnt): ret, img_ori = vid.read() if input_args.use_letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, pred_args.new_size[0], pred_args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(pred_args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) end_time = time.time() if input_args.use_letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(pred_args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(pred_args.new_size[1])) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=pred_args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=pred_args.color_table[labels_[i]]) cv2.putText( img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2 ) cv2.imshow('Detection result', img_ori) video_writer.write(img_ori) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() video_writer.release()
def img_detect(input_args): """ 图片检测 :param input_args: :return: """ img_ori = cv2.imread(input_args.input_image) # opencv 打开 if input_args.use_letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, pred_args.new_size[0], pred_args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(pred_args.new_size)) # img 转RGB, 转float, 归一化 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. sess = tf.Session() input_data = tf.placeholder( tf.float32, [1, pred_args.new_size[1], pred_args.new_size[0], 3], name='input_data' ) with tf.variable_scope('yolov3'): yolo_model = yolov3(pred_args.num_class, pred_args.anchors) pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms( pred_boxes, pred_scores, pred_args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, pred_args.weight_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # 还原坐标到原图 if input_args.use_letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(pred_args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(pred_args.new_size[1])) print('box coords:', boxes_, '\n' + '*' * 30) print('scores:', scores_, '\n' + '*' * 30) print('labels:', labels_) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box( img_ori, [x0, y0, x1, y1], label=pred_args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=pred_args.color_table[labels_[i]] ) cv2.imshow('Detection result', img_ori) cv2.imwrite(pred_args.output_image, img_ori) cv2.waitKey(0) sess.close()
def single_image_test(imgname): parser = argparse.ArgumentParser( description="YOLO-V3 test single image test procedure.") parser.add_argument("--input_image", type=str, default="./static/uploads/beforeimg/" + imgname, help="The path of the input image.") parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt", help="The path of the anchor txt file.") parser.add_argument( "--new_size", nargs='*', type=int, default=[416, 416], help= "Resize the input image with `new_size`, size format: [width, height]") parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True, help="Whether to use the letterbox resize.") parser.add_argument("--class_name_path", type=str, default="./data/coco.names", help="The path of the class names.") parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt", help="The path of the weights to restore.") args = parser.parse_args() args.anchors = parse_anchors(args.anchor_path) args.classes = read_class_names(args.class_name_path) args.num_class = len(args.classes) color_table = get_color_table(args.num_class) img_ori = cv2.imread(args.input_image) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, args.restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) #cv2.imshow('Detection result', img_ori) cv2.imwrite('static/uploads/afterimg/' + imgname, img_ori) #cv2.waitKey(0) doc = [] doc.append("发现:") item = ["安全帽", "未带安全帽的人"] if (len(labels_) == 0): doc.append("什么都没有发现。") else: for i in range(len(labels_)): doc.append(item[labels_[i]] + ",范围:" + str(boxes_[i]) + ",可能性为:" + str(scores_[i])) return doc
def detect_in_video(video_path): # VideoWriter is the responsible of creating a copy of the video # used for the detections but with the detections overlays. Keep in # mind the frame size has to be the same as original video. # out = cv2.VideoWriter('../temp/' + 'WIN_20191218_11_03_57_Pro.mp4', cv2.VideoWriter_fourcc( # 'M', 'J', 'P', 'G'), 10, (1280, 720)) if is_yolo: print('yolo!') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration) input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=1, score_thresh=0.2, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) else: detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration, graph=detection_graph) # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object # was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class # label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) frame_statistics = [] frame_id = 1 is_skip_frame = True frame_skip_count = 0 # Создать директорию с кадрами для заданного видео video_base_name = os.path.basename(video_path) video_name = os.path.splitext(video_base_name)[0] video_dir = join(os.path.dirname(video_path), video_name) images_dir = "images" video_images_dir = join(video_dir, images_dir) if not os.path.exists(video_images_dir): os.makedirs(video_images_dir) else: # Удалить все кадры из целевой директории remove_files_in_dir(video_images_dir) video_images_dir_rat = join(video_images_dir, 'rat') video_images_dir_mouse = join(video_images_dir, 'mouse') os.makedirs(video_images_dir_rat, exist_ok=True) os.makedirs(video_images_dir_mouse, exist_ok=True) remove_files_in_dir(video_images_dir_rat) remove_files_in_dir(video_images_dir_mouse) # Загрузка видео cap = cv2.VideoCapture(video_path) video_frame_cnt = int(cap.get(7)) video_width = int(cap.get(3)) video_height = int(cap.get(4)) video_fps = int(cap.get(5)) # Узнать разрешение видео video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # Указать разрешение картинок cur_dir = os.getcwd() os.chdir(video_images_dir) while cap.isOpened(): # Read the frame ret, frame = cap.read() if frame is not None: # Recolor the frame. By default, OpenCV uses BGR color space. # This short blog post explains this better: # https://www.learnopencv.com/why-does-opencv-use-bgr-color-format/ # color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if not is_skip_frame: if is_yolo: print('yoloo!!') if is_letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( frame, new_size[0], new_size[1]) else: height_ori, width_ori = frame.shape[:2] img = cv2.resize(frame, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={input_data: img}) end_time = time.time() # rescale the coordinates to the original image if is_letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) for i in range(len(boxes_)): if scores_[i] == max(scores_): x0, y0, x1, y1 = boxes_[i] plot_one_box(frame, [x0, y0, x1, y1], label=classes_yolo[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) rodent_confidence = scores_[i] rodent_class_id = labels_[i] + 1 rodent_class_name = classes_yolo[labels_[i]] if rodent_confidence >= .20: frame_statistics.append({ 'frame_id': frame_id, 'confidence': rodent_confidence, 'rodent_class_id': rodent_class_id, 'rodent_class_name': rodent_class_name, }) # Сохранить кадр frame_name = rodent_class_name + '/image' + str( frame_id) + '.jpg' cv2.imwrite(frame_name, frame) # Сохранить xml-файл #scores = np.squeeze(scores[0]) #bbox_coords = boxes[0] #writer = Writer('.', video_width, video_height) #writer.addObject(rodent_class_name, bbox_coords[1] * video_width, #bbox_coords[0] * video_height, bbox_coords[3] * video_width, #bbox_coords[2] * video_height) #writer.save('image' + str(frame_id) + '.xml') #else: # Сохранить кадр #frame_name = 'image' + str(frame_id) + '.jpg' #cv2.imwrite(frame_name, frame) cv2.putText(frame, '{:.2f}ms'.format( (end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) else: image_np_expanded = np.expand_dims(frame, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. # note: perform the detections using a higher threshold vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes[0]), np.squeeze(classes[0]).astype(np.int32), np.squeeze(scores[0]), category_index, use_normalized_coordinates=True, line_thickness=8, max_boxes_to_draw=1, min_score_thresh=.20) # rodent_confidence = np.squeeze(scores[0])[0] # rodent_class_id = np.squeeze(classes[0]).astype(np.int32)[0] # rodent_class_name = category_index[rodent_class_id]['name'] # if rodent_confidence > .20: # frame_statistics.append({'frame_id': frame_id, # 'confidence': rodent_confidence, # 'rodent_class_id': rodent_class_id, # 'rodent_class_name': rodent_class_name, # }) # # # Сохранить кадр # frame_name = rodent_class_name + '/image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) # # # Сохранить xml-файл # scores = np.squeeze(scores[0]) # for i in range(min(1, np.squeeze(boxes[0]).shape[0])): # if scores is None or scores[i] > .20: # boxes = tuple(boxes[i].tolist()) # # bbox_coords = boxes[0] # writer = Writer('.', video_width, video_height) # writer.addObject(rodent_class_name, bbox_coords[1] * video_width, # bbox_coords[0] * video_height, bbox_coords[3] * video_width, # bbox_coords[2] * video_height) # writer.save('image' + str(frame_id) + '.xml') # else: # # Сохранить кадр # frame_name = 'image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) cv2.imshow('frame', cv2.resize(frame, (800, 600))) output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # out.write(output_rgb # Пропустить кадр, если необходимо if is_skip_frame: while 1: key = cv2.waitKey(1) if key == 32: # Нажата клавиша "space" frame_skip_count += 1 print("Вы пропустили " + str(frame_skip_count) + " кадр") break elif key == 113 or key == 233: # Нажата клавиша 'q' ('й') is_skip_frame = False break if cv2.waitKey(1) & 0xFF == ord('q'): break frame_id += 1 # out.release() os.chdir(cur_dir) cap.release() cv2.destroyAllWindows() statistics = { 'frame_count': frame_id, # Количество кадров 'frame_skip_count': frame_skip_count, # Количество пропущенных кадров 'frame_rodent_count': 0, # Количество кадров с грызуном 'frame_rat_count': 0, # Количество кадров с крысой 'frame_mouse_count': 0, # Количество кадров с мышью 'sum_confidence_rat': 0, # Сумма вероятностей крысы на видео 'sum_confidence_mouse': 0, # Сумма вероятностей мыши на видео 'mean_confidence_rat': 0, # Средняя вероятность крысы на видео 'mean_confidence_mouse': 0 # Средняя вероятность мыши на видео } for frame_statistic in frame_statistics: if frame_statistic['rodent_class_name'] == 'rat': statistics['frame_rodent_count'] += 1 statistics['frame_rat_count'] += 1 statistics['sum_confidence_rat'] += frame_statistic['confidence'] statistics['mean_confidence_rat'] = statistics[ 'sum_confidence_rat'] / statistics['frame_rat_count'] elif frame_statistic['rodent_class_name'] == 'mouse': statistics['frame_rodent_count'] += 1 statistics['frame_mouse_count'] += 1 statistics['sum_confidence_mouse'] += frame_statistic['confidence'] statistics['mean_confidence_mouse'] = statistics[ 'sum_confidence_mouse'] / statistics['frame_mouse_count'] print('----->>> Результаты обнаружения <<<-----') print('Количество кадров: ' + str(statistics['frame_count'])) print('Количество пропущенных кадров: ' + str(statistics['frame_skip_count'])) print('Количество кадров с грызуном: ' + str(statistics['frame_rodent_count'])) print('Количество кадров с крысой: ' + str(statistics['frame_rat_count'])) print('Количество кадров с мышью: ' + str(statistics['frame_mouse_count'])) print('Средняя вероятность крысы на видео: ' + str(statistics['mean_confidence_rat'])) print('Средняя вероятность мыши на видео: ' + str(statistics['mean_confidence_mouse']))