img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.2, nms_thresh=0.4) saver = tf.train.Saver() saver.restore(sess, args.restore_path) print('restore weights:', args.restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else:
# Save Info for representation: y_axis_mAP[idx] = mAP y_axis_mIT[idx] = np.mean(mean_it) y_axis_mAPperIT[idx] = mAP/np.mean(mean_it) #LOAD YOLOv3 MODEL with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=50, score_thresh=0.4, iou_thresh=0.5) saver = tf.train.Saver() saver.restore(sess, args.restore_path) # DETECTION LOOP FOR ALL IMAGES: annot = [] first_it = np.zeros(len(TEST_IMAGE_PATHS)) mean_it = np.zeros(len(TEST_IMAGE_PATHS)) total_it = np.zeros(len(TEST_IMAGE_PATHS)) for index, input_image in enumerate(TEST_IMAGE_PATHS): # DETECTION print('Runing Inference for model YOLOv3...'.format(model)) iterations = 5 inf_times = np.zeros(iterations)
from model import yolov3 # setting loggers logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=args.progress_log_path, filemode='w') # setting placeholders is_training = tf.placeholder(tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') # register the gpu nms operation here for the following evaluation scheme pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None]) pred_scores_flag = tf.placeholder(tf.float32, [1, None, None]) gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold) ################## # tf.data pipeline ################## train_dataset = tf.data.TextLineDataset(args.train_file) train_dataset = train_dataset.shuffle(args.train_img_cnt) train_dataset = train_dataset.batch(args.batch_size) train_dataset = train_dataset.map(lambda x: tf.py_func( get_batch_data, inp=[ x, args.class_num, args.img_size, args.anchors, 'train', args. multi_scale_train, args.use_mix_up, args.letterbox_resize ], Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads)
def Run(self, img_name, half=True): if self.sess == None: import tensorflow as tf from model import yolov3 from utils.misc_utils import parse_anchors, read_class_names from utils.nms_utils import gpu_nms self.anchors = parse_anchors('./model/yolo_anchors.txt') self.classes = read_class_names('./model/coco.names') self.num_class = len(self.classes) self.sess = tf.Session() self.input_data = tf.placeholder( tf.float32, [1, self.new_size[1], self.new_size[0], 3], name='input_data') self.yolo_model = yolov3(self.num_class, self.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = self.yolo_model.forward( self.input_data, False) self.pred_boxes, self.pred_confs, self.pred_probs, self.map4 = self.yolo_model.predict( pred_feature_maps) self.pred_scores = self.pred_confs * self.pred_probs self.boxes, self.scores, self.labels = gpu_nms(self.pred_boxes, self.pred_scores, self.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) self.saver = tf.train.Saver() self.saver.restore(self.sess, self.MODEL_NAME) # self.sess.run(tf.global_variables_initializer()) print('Tensorflow intialized') # print('getting boxes') boxes, scores, feature_map = self.GetBoundingBox(img_name, half) # print('got boxes:',boxes) if len(boxes) != 0: dist, angle = self.getDistanceAndAngle(boxes[np.argmax(scores)], self.width_ori, self.height_ori) self.lastNDistances.append(dist) self.lastNAngles.append(angle) alpha = self.alpha if len(self.lastNDistances) > 1 else 1 self.exponentialMovingAverageDist = alpha * dist + ( 1 - alpha) * self.exponentialMovingAverageDist self.exponentialMovingAverageAngle = alpha * angle + ( 1 - alpha) * self.exponentialMovingAverageAngle angle, _ = self.segmentation.FindPossibleAngle( boxes[np.argmax(scores)], angle, feature_map, self.width_ori, self.height_ori) else: print('No box found') dist, angle = self.Extrapolate() angle, _ = self.segmentation.FindPossibleAngle( boxes, angle, feature_map, self.width_ori, self.height_ori) self.KeepLastN() angle = self.LimitAngles(angle) dist = self.LimitDistance(dist) return dist, angle
def detect_in_video(video_path): # VideoWriter is the responsible of creating a copy of the video # used for the detections but with the detections overlays. Keep in # mind the frame size has to be the same as original video. # out = cv2.VideoWriter('../temp/' + 'WIN_20191218_11_03_57_Pro.mp4', cv2.VideoWriter_fourcc( # 'M', 'J', 'P', 'G'), 10, (1280, 720)) if is_yolo: print('yolo!') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration) input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=1, score_thresh=0.2, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) else: detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration, graph=detection_graph) # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object # was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class # label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) frame_statistics = [] frame_id = 1 is_skip_frame = True frame_skip_count = 0 # Создать директорию с кадрами для заданного видео video_base_name = os.path.basename(video_path) video_name = os.path.splitext(video_base_name)[0] video_dir = join(os.path.dirname(video_path), video_name) images_dir = "images" video_images_dir = join(video_dir, images_dir) if not os.path.exists(video_images_dir): os.makedirs(video_images_dir) else: # Удалить все кадры из целевой директории remove_files_in_dir(video_images_dir) video_images_dir_rat = join(video_images_dir, 'rat') video_images_dir_mouse = join(video_images_dir, 'mouse') os.makedirs(video_images_dir_rat, exist_ok=True) os.makedirs(video_images_dir_mouse, exist_ok=True) remove_files_in_dir(video_images_dir_rat) remove_files_in_dir(video_images_dir_mouse) # Загрузка видео cap = cv2.VideoCapture(video_path) video_frame_cnt = int(cap.get(7)) video_width = int(cap.get(3)) video_height = int(cap.get(4)) video_fps = int(cap.get(5)) # Узнать разрешение видео video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # Указать разрешение картинок cur_dir = os.getcwd() os.chdir(video_images_dir) while cap.isOpened(): # Read the frame ret, frame = cap.read() if frame is not None: # Recolor the frame. By default, OpenCV uses BGR color space. # This short blog post explains this better: # https://www.learnopencv.com/why-does-opencv-use-bgr-color-format/ # color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if not is_skip_frame: if is_yolo: print('yoloo!!') if is_letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( frame, new_size[0], new_size[1]) else: height_ori, width_ori = frame.shape[:2] img = cv2.resize(frame, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={input_data: img}) end_time = time.time() # rescale the coordinates to the original image if is_letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) for i in range(len(boxes_)): if scores_[i] == max(scores_): x0, y0, x1, y1 = boxes_[i] plot_one_box(frame, [x0, y0, x1, y1], label=classes_yolo[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) rodent_confidence = scores_[i] rodent_class_id = labels_[i] + 1 rodent_class_name = classes_yolo[labels_[i]] if rodent_confidence >= .20: frame_statistics.append({ 'frame_id': frame_id, 'confidence': rodent_confidence, 'rodent_class_id': rodent_class_id, 'rodent_class_name': rodent_class_name, }) # Сохранить кадр frame_name = rodent_class_name + '/image' + str( frame_id) + '.jpg' cv2.imwrite(frame_name, frame) # Сохранить xml-файл #scores = np.squeeze(scores[0]) #bbox_coords = boxes[0] #writer = Writer('.', video_width, video_height) #writer.addObject(rodent_class_name, bbox_coords[1] * video_width, #bbox_coords[0] * video_height, bbox_coords[3] * video_width, #bbox_coords[2] * video_height) #writer.save('image' + str(frame_id) + '.xml') #else: # Сохранить кадр #frame_name = 'image' + str(frame_id) + '.jpg' #cv2.imwrite(frame_name, frame) cv2.putText(frame, '{:.2f}ms'.format( (end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) else: image_np_expanded = np.expand_dims(frame, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. # note: perform the detections using a higher threshold vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes[0]), np.squeeze(classes[0]).astype(np.int32), np.squeeze(scores[0]), category_index, use_normalized_coordinates=True, line_thickness=8, max_boxes_to_draw=1, min_score_thresh=.20) # rodent_confidence = np.squeeze(scores[0])[0] # rodent_class_id = np.squeeze(classes[0]).astype(np.int32)[0] # rodent_class_name = category_index[rodent_class_id]['name'] # if rodent_confidence > .20: # frame_statistics.append({'frame_id': frame_id, # 'confidence': rodent_confidence, # 'rodent_class_id': rodent_class_id, # 'rodent_class_name': rodent_class_name, # }) # # # Сохранить кадр # frame_name = rodent_class_name + '/image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) # # # Сохранить xml-файл # scores = np.squeeze(scores[0]) # for i in range(min(1, np.squeeze(boxes[0]).shape[0])): # if scores is None or scores[i] > .20: # boxes = tuple(boxes[i].tolist()) # # bbox_coords = boxes[0] # writer = Writer('.', video_width, video_height) # writer.addObject(rodent_class_name, bbox_coords[1] * video_width, # bbox_coords[0] * video_height, bbox_coords[3] * video_width, # bbox_coords[2] * video_height) # writer.save('image' + str(frame_id) + '.xml') # else: # # Сохранить кадр # frame_name = 'image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) cv2.imshow('frame', cv2.resize(frame, (800, 600))) output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # out.write(output_rgb # Пропустить кадр, если необходимо if is_skip_frame: while 1: key = cv2.waitKey(1) if key == 32: # Нажата клавиша "space" frame_skip_count += 1 print("Вы пропустили " + str(frame_skip_count) + " кадр") break elif key == 113 or key == 233: # Нажата клавиша 'q' ('й') is_skip_frame = False break if cv2.waitKey(1) & 0xFF == ord('q'): break frame_id += 1 # out.release() os.chdir(cur_dir) cap.release() cv2.destroyAllWindows() statistics = { 'frame_count': frame_id, # Количество кадров 'frame_skip_count': frame_skip_count, # Количество пропущенных кадров 'frame_rodent_count': 0, # Количество кадров с грызуном 'frame_rat_count': 0, # Количество кадров с крысой 'frame_mouse_count': 0, # Количество кадров с мышью 'sum_confidence_rat': 0, # Сумма вероятностей крысы на видео 'sum_confidence_mouse': 0, # Сумма вероятностей мыши на видео 'mean_confidence_rat': 0, # Средняя вероятность крысы на видео 'mean_confidence_mouse': 0 # Средняя вероятность мыши на видео } for frame_statistic in frame_statistics: if frame_statistic['rodent_class_name'] == 'rat': statistics['frame_rodent_count'] += 1 statistics['frame_rat_count'] += 1 statistics['sum_confidence_rat'] += frame_statistic['confidence'] statistics['mean_confidence_rat'] = statistics[ 'sum_confidence_rat'] / statistics['frame_rat_count'] elif frame_statistic['rodent_class_name'] == 'mouse': statistics['frame_rodent_count'] += 1 statistics['frame_mouse_count'] += 1 statistics['sum_confidence_mouse'] += frame_statistic['confidence'] statistics['mean_confidence_mouse'] = statistics[ 'sum_confidence_mouse'] / statistics['frame_mouse_count'] print('----->>> Результаты обнаружения <<<-----') print('Количество кадров: ' + str(statistics['frame_count'])) print('Количество пропущенных кадров: ' + str(statistics['frame_skip_count'])) print('Количество кадров с грызуном: ' + str(statistics['frame_rodent_count'])) print('Количество кадров с крысой: ' + str(statistics['frame_rat_count'])) print('Количество кадров с мышью: ' + str(statistics['frame_mouse_count'])) print('Средняя вероятность крысы на видео: ' + str(statistics['mean_confidence_rat'])) print('Средняя вероятность мыши на видео: ' + str(statistics['mean_confidence_mouse']))