def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE config = ConfigProto() config.gpu_options.allow_growth = True # session = InteractiveSession(config=config) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') @tf.function def run_model(x): return model(x) # Test the TensorFlow Lite model on random input data. sum = 0 original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img_raw = tf.expand_dims(img_raw, 0) img_raw = tf.image.resize(img_raw, (FLAGS.size, FLAGS.size)) batched_input = tf.constant(image_data) for i in range(1000): prev_time = time.time() # pred_bbox = model.predict(image_data) if FLAGS.framework == 'tf': pred_bbox = [] result = run_model(image_data) for value in result: value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') elif FLAGS.framework == 'trt': pred_bbox = [] result = infer(batched_input) for key, value in result.items(): value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time if i == 0: continue sum += (1 / exec_time) info = str(i) + " time:" + str(round( exec_time, 3)) + " average FPS:" + str(round( sum / i, 2)) + ", FPS: " + str(round((1 / exec_time), 1)) print(info)
predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = [p for i, p in enumerate(pred_bbox) if i % 2 == 1] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n'
prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" %(1000*exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break if __name__ == '__main__': try: app.run(main)
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) model.summary() utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov4': if FLAGS.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
model = tf.keras.models.load_model('SavedModel/YOLOv3_model') image = cv2.imread(path_to_image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) # (1, width, height, 3) pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, SCORE_THRESHOLD) bboxes = utils.nms(bboxes, IOU_THRESHOLD, method='nms') image_with_detections = utils.draw_bbox(image, bboxes) image_with_detections = cv2.cvtColor(image_with_detections, cv2.COLOR_RGB2BGR) print('image_size = ', image_size) # If image size is too big then resize the image for display purposes display_shape = (image_size[1], image_size[0]) if image_size[0] > 1500 or image_size[1] > 1500: display_shape = (int(image_size[1] / 4), int(image_size[0] / 4)) image_with_detections = cv2.resize(image_with_detections, display_shape) cv2.imshow('detections', image_with_detections) cv2.waitKey(0)
def predict(self, image_path, result_dir='.', save_img=True, image_name=None): try: if not (os.path.exists(image_path)): print('No such file or directory', image_path) #return None else: original_image = cv2.imread(image_path) print('Shape1', original_image.shape) except: original_image = image_path print('Shape2', original_image.shape) if self.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, self.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if self.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, self.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, self.tiny) XYSCALE = cfg.YOLO.XYSCALE input_size = self.size try: #print('image:',original_image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] except: return pd.DataFrame() image_data = utils.image_preprocess( np.copy(original_image), [self.size, self.size]) #[input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if self.framework == 'tf': model = self.instanciated_model #model.summary() pred_bbox = model.predict(image_data) else: interpreter = self.instanciated_model # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if self.model == 'yolov4': if self.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) #image.show() classes = utils.read_class_names(cfg.YOLO.CLASSES) list_bboxes = [] for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) #print('type bbox',type(bbox)) #print('bbox',bbox[:4]) #print('coor',list(coor)) bbox_info = { 'coor': list(coor), 'probability': score, 'class': classes[class_ind] } list_bboxes.append(bbox_info) output_name = os.path.join(result_dir + '/out_' + str(image_name) + '.jpg') if save_img: image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) try: output_name = os.path.join(result_dir + '/out_' + os.path.basename(image_path)) if save_img: image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) output = pd.DataFrame(list_bboxes) #print('image_path',image_path ) output_name = '.'.join(output_name.split('.')[:2]) + '.xlsx' #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' print('Result file saved to', output_name) output.to_excel(output_name) return output except Exception as e: print(e) return pd.DataFrame() #yolo = YoloV4() #yolo.predict('1fc35a5149379fff131e939f18257341.7.jpeg') # Working Class # ============================================================================= # class YoloV4: # # def __init__(self,framework = 'tf', weights=os.path.join(Path(os.path.realpath(__file__)).parent,'data/yolov4.weights'),size=608,tiny=False,model='yolov4'): # self.framework = framework # self.weights = weights # self.size = size # self.tiny = tiny # self.model = model # self.instanciated_model = None # # # Instanciate model # # print('Tiny ',self.tiny) # # #image_path = self.image # NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) # input_size = self.size # if self.framework == 'tf': # input_layer = tf.keras.layers.Input([input_size, input_size, 3]) # if self.tiny: # if self.model == 'yolov3': # feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) # else: # feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # model.summary() # utils.load_weights_tiny(model, self.weights, self.model) # else: # if self.model == 'yolov3': # feature_maps = YOLOv3(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # utils.load_weights_v3(model, self.weights) # elif self.model == 'yolov4': # feature_maps = YOLOv4(input_layer, NUM_CLASS) # bbox_tensors = [] # for i, fm in enumerate(feature_maps): # bbox_tensor = decode(fm, NUM_CLASS, i) # bbox_tensors.append(bbox_tensor) # model = tf.keras.Model(input_layer, bbox_tensors) # # if self.weights.split(".")[len(self.weights.split(".")) - 1] == "weights": # utils.load_weights(model, self.weights) # else: # model.load_weights(self.weights).expect_partial() # # self.instanciated_model = model # # else: # # Load TFLite model and allocate tensors. # interpreter = tf.lite.Interpreter(model_path=self.weights) # interpreter.allocate_tensors() # # self.instanciated_model = interpreter # # # def predict(self,image_path,result_dir='results',save_img=True): # # if self.tiny: # STRIDES = np.array(cfg.YOLO.STRIDES_TINY) # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, self.tiny) # XYSCALE = cfg.YOLO.XYSCALE_TINY # else: # STRIDES = np.array(cfg.YOLO.STRIDES) # if self.model == 'yolov4': # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, self.tiny) # else: # ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, self.tiny) # XYSCALE = cfg.YOLO.XYSCALE # # input_size = self.size # # original_image = cv2.imread(image_path) # print('image:',original_image) # original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # original_image_size = original_image.shape[:2] # # image_data = utils.image_preprocess(np.copy(original_image), [self.size,self.size])#[input_size, input_size]) # image_data = image_data[np.newaxis, ...].astype(np.float32) # # if self.framework == 'tf': # model = self.instanciated_model # model.summary() # pred_bbox = model.predict(image_data) # # else: # interpreter = self.instanciated_model # # # Get input and output tensors. # input_details = interpreter.get_input_details() # output_details = interpreter.get_output_details() # print(input_details) # print(output_details) # # interpreter.set_tensor(input_details[0]['index'], image_data) # interpreter.invoke() # pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # # if self.model == 'yolov4': # if self.tiny: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) # else: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) # else: # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) # bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) # bboxes = utils.nms(bboxes, 0.213, method='nms') # # image = utils.draw_bbox(original_image, bboxes) # image = Image.fromarray(image) # #image.show() # # print('Image path',image_path) # print('Type Image path',type(image_path)) # print('Bboxes type',type(bboxes)) # # classes = utils.read_class_names(cfg.YOLO.CLASSES) # list_bboxes = [] # # for i, bbox in enumerate(bboxes): # coor = np.array(bbox[:4], dtype=np.int32) # score = bbox[4] # class_ind = int(bbox[5]) # #print('type bbox',type(bbox)) # #print('bbox',bbox[:4]) # #print('coor',list(coor)) # bbox_info = {'coor':list(coor),'probability':score,'class':classes[class_ind]} # list_bboxes.append(bbox_info) # # try: # output_name = os.path.join(result_dir+'/out_' + os.path.basename(image_path)) # # if save_img: # image.save(output_name) # #cv2.imwrite(output_name,img) # print('Img saved to',output_name) # # output = pd.DataFrame(list_bboxes) # print('image_path',image_path ) # output_name = '.'.join(output_name.split('.')[:2])+'.xlsx' # #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' # print('output_name',output_name) # output.to_excel(output_name) # # except Exception as e: # print(e) # ============================================================================= #yolo = YoloV4() #yolo.predict('1fc35a5149379fff131e939f18257341.7.jpeg')
pb_file = "./saved_model.pb" image_path = './test_images/' + files[100] num_classes = 1 input_size = 416 graph = tf.Graph() original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) #score_threshold = 0.3 bboxes = utils.nms(bboxes, 0.45, method='nms') #iou_threshold = 0.45 image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image
def main(video_path): global end,init return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"] pb_file = "./yolov3_coco.pb" num_classes = 80 # 使用原有训练权重,有80个 input_size = 416 graph = tf.Graph() # 计算图,表示实例化一个用于tensorflow计算和表示用的数据流图,不负责运行计算 return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) framenumber = 1 junzhi=10 with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) # 获取视频 init = 0 fourcc = cv2.VideoWriter_fourcc('X','V','I','D') size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(os.path.join(path_,'result2.avi'), fourcc, 30.0, size) while (1): return_value, frame = vid.read() #frame = cv2.flip(frame, 1) #水平颠倒 framenumber = framenumber + 1 currentFrame = framenumber if currentFrame % 6 != 0: out.write(frame) continue if return_value: J = dehaze.DeHaze(frame) # frame = cv2.cvtColor(J, cv2.COLOR_BGR2RGB) #image = Image.fromarray(J) frame = J else: raise ValueError("No image!") frame1 = Pedestrian_Detection.detect(frame) #识别行人 frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] #prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) # np.concatenate numpy.concatenate((a1,a2,...), axis=0)函数。能够一次完成多个数组的拼接。其中a1,a2,...是拼接数组的名字 # np.reshape(a,newshape,order='C') a:数组——需要处理的数据 newshape:新的格式——整数或整数数组,如(2,3)表示2行3列。新的形状应该与原来的形状兼容,即行数和列数相乘后等于a中元素的数量。 bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) # pred_bbox 预测的框架 frame_size 框架尺寸 bboxes = utils.nms(bboxes, 0.45, method='nms') image,junzhi = utils.draw_bbox(frame1, bboxes, junzhi) # 绘制框 for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) if coor[2]-coor[0] > junzhi*1.5: winsound.Beep(600, 100) #pygame.mixer.music.play(1) out.write(image) #result = np.asarray(image) #cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) #result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) pilImage = Image.fromarray(image) pilImage = pilImage.resize((700, 380), Image.ANTIALIAS) tkImage = ImageTk.PhotoImage(image=pilImage) bj.create_image(0, 0, anchor='nw', image=tkImage) window.update_idletasks() window.update() str1.set('进行中') if end == 1: str1.set('欢迎使用') end = 0 init = 1 window.update() sess.close() if cv2.waitKey(1) & 0xFF == ord('q'): str1.set('欢迎使用') window.update() init = 1 break str1.set('欢迎使用') window.update() init = 1 end = 0 return
def main(_argv): #Yolo-tiny버전이 아닌지 if문을 통해 구분한다. if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) #tiny버전이 아닐 경우 Yolo-v4모델을 가져오고 anchor박스의 정보도 함께 가져온다. if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) #클래스개수, 박스의 XYSCALE을 Yolo-v4의 cfg파일에서 불러오고 input_size와 image_path를 미리 정의한 flags객체의 size와 image값으로 정의한다. NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image #cv2모듈을 통해 이미지를 불러오고 불러온 이미지를 BGR이미지를 RGB로 바꿔준다. #이는 컬러 사진을 opencv에서는 BGR순서로 저장하는데 matplotlib에서는 RGB로 저장하기 때문이다. original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] #이미지 데이터들을 배열로 바꿔주고 데이터타입을 float32로 변환해준다. image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) #framework가 tf로 정의된 경우 FLAGS.model이 어떻게 정의되었는지에 따라 불러오는 모델이 다르다. #지금의 경우는 Yolo-v4를 다루고 있으므로 FLAGS.model이 yolov4로 정의된 경우만 보겠다. if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) #YOLOv4에 input 레이어와 클래스를 넣어주어 feature map을 생성하고 바운딩 박스를 예측하기 위한 리스트를 선언해준다. #이후 반복문을 통해 예측된 바운딩박스의 좌표를 리스트에 넣어준 뒤 이것을 model에 input레이어와 함께 넣어 model을 생성해준다. #그 다음 미리 학습된 weights값들을 load해온다. elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() #이후 원래 이미지 데이터에서 예측된 바운딩 박스를 표시해준다. pred_bbox = model.predict(image_data) else:. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #이후 표시된 바운딩 박스 중 유효한 바운딩 박스들만 남기는 작업을 한 후 최종적으로 pred_bbox에 저장한다. if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') #cv2모듈을 사용하여 예측한 바운딩박스가 표시된 이미지를 출력한다. image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def deal_data(conn, addr): print('Accept new connection from {0}'.format(addr)) # conn.settimeout(500) conn.send('Hi, Welcome to the server!'.encode("utf-8")) while 1: fileinfo_size = struct.calcsize('128sq') buf = conn.recv(fileinfo_size) if buf: # filename, filesize = struct.unpack('128sq', buf) filesize = buf fn = 'rgb.jpg' #filename.strip(b"\x00").decode("utf-8") new_filename = os.path.join('./', 'new_' + fn) print(new_filename, filesize) print('file new name is {0}, filesize if {1}'.format( new_filename, filesize)) recvd_size = 0 # 定义已接收文件的大小 fp = open(new_filename, 'wb') print('start receiving...') while not recvd_size == filesize: if filesize - recvd_size > 1024: data = conn.recv(1024) recvd_size += len(data) else: data = conn.recv(filesize - recvd_size) recvd_size = filesize fp.write(data) fp.close() print('end receive...') tic = time.time() original_image = cv2.imread(new_filename) # rotate the image height, width = original_image.shape[:2] matRotate = cv2.getRotationMatrix2D((height * 0.5, width * 0.5), -90, 1) dst = cv2.warpAffine(original_image, matRotate, (width, height * 2)) rows, cols = dst.shape[:2] for col in range(0, cols): if dst[:, col].any(): left = col break for col in range(cols - 1, 0, -1): if dst[:, col].any(): right = col break for row in range(0, rows): if dst[row, :].any(): up = row break for row in range(rows - 1, 0, -1): if dst[row, :].any(): down = row break res_widths = abs(right - left) res_heights = abs(down - up) res = np.zeros([res_heights, res_widths, 3], np.uint8) for res_width in range(res_widths): for res_height in range(res_heights): res[res_height, res_width] = dst[up + res_height, left + res_width] original_image = res original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') strbox = str(len(bboxes)) + ',' for i in range(0, len(bboxes)): for j in range(0, 5): strbox = strbox + str(bboxes[i][j]) + ',' strbox = strbox + classes[int(bboxes[i][5])] + ',' image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() toc = time.time() print(toc - tic) conn.send(strbox.encode("utf-8")) print(conn.recv(1024).decode('utf-8')) conn.close() break
def evaluate(model_path): INPUT_SIZE = 416 NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './data/mAP/predicted' ground_truth_dir_path = './data/mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # Build Model model = yolov3.build_for_test() model.load_weights(model_path) # utils.load_weights(model, "./weight/yolov3-voc_10000.weights") # 加载利用darknet训练的权重文件,需要用utils.load_weights print(model.summary()) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array( [list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip())
def detection(vid): with tf.Session(graph=graph) as sess: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image, detected = utils.draw_bbox(frame, bboxes) detected = np.asarray(detected) print("------- frame i ---------") class_count = [] for i in range(len(obj_classes)): # 80 obj_count = 0 for j in range(len(detected)): if int(detected[j][5]) == i: obj_count += 1 class_count = np.append(class_count, obj_count) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) return result, class_count # if __name__ == "__main__": # while True: # result = detection(vid) # cv2.imshow("result", result) # if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() # Align the depth frame to color frame aligned_frames = align.process(frames) # Get aligned frames depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() if not depth_frame or not color_frame: continue depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics color_intrin = color_frame.profile.as_video_stream_profile().intrinsics depth_to_color_extrin = depth_frame.profile.get_extrinsics_to( color_frame.profile) depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) frame = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() scaled_depth = cv2.convertScaleAbs(depth_image, alpha=0.08) depth_colormap = cv2.applyColorMap(scaled_depth, cv2.COLORMAP_JET) if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') view2d = np.zeros((480, 640, 3), np.uint8) for box in bboxes: x_mid = int((box[0] + box[2]) / 2) y_mid = int((box[1] + box[3]) / 2) pixel_depths = [] for i in range(3): for j in range(3): pixel_depths.append( depth_frame.get_distance(int(x_mid + i - 1), int(y_mid + j - 1))) object_depth = statistics.median(pixel_depths) object_point = rs.rs2_deproject_pixel_to_point( depth_intrin, [x_mid, y_mid], object_depth) if box[5] == 67.0: print('found phone') if object_depth == 0.0: print('depth not found') depth_colormap[max(0, min(y_mid, 479)), max(0, min(x_mid, 639))] = [0, 255, 0] view2d[max(0, min(480 - int(object_point[2] * 350), 479)), max(0, min(int(object_point[0] * 350) + 320, 639))] = [0, 255, 0] #print('x_min', box[0]) #print('y_min', box[1]) #print('x_max', box[2]) #print('y_max', box[3]) #print('probability', box[4]) #print('object_id', box[5]) #print('point', object_point) #print('-----') #curr_time = time.time() #exec_time = curr_time - prev_time #info = "time: %.2f ms" %(1000*exec_time) #print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) image_color = utils.draw_bbox(frame, bboxes) result = cv2.cvtColor(image_color, cv2.COLOR_RGB2BGR) image_depth = utils.draw_bbox(depth_colormap, bboxes) images = np.hstack((view2d, image_depth)) cv2.imshow("result", images) print('-----') if cv2.waitKey(1) & 0xFF == ord('q'): pipeline.stop() break
def detect_inference(self): self.update_process_message.emit('Detection Process') graph = tf.Graph() detections = [] return_tensors = utils.read_pb_return_tensors( graph, self.window.pb_file, self.window.return_elements) # 创建进度条 pbar = tqdm(total=self.window.total_frame_counter) with tf.Session(graph=graph) as sess: if self.window.writeVideo_flag: isOutput = True if self.window.output_path != "" else False if isOutput: video_FourCC = cv2.VideoWriter_fourcc(*'MPEG') out = cv2.VideoWriter( self.window.output_path, video_FourCC, self.window.media_fps, self.window.media_size) list_file = open('detection.txt', 'w') frame_index = -1 while True: while not self.window.is_on: pass return_value, frame = self.window.vid.read() if return_value != True: break if return_value: image = Image.fromarray(frame) self.window.frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess( np.copy(frame), [self.window.input_size, self.window.input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.window.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.window.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.window.num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, self.window.input_size, 0.45) bboxes = utils.nms(bboxes, 0.45, method='nms') # save images # save_image.save_image(annotation_file, frame, vid.get(1), bboxes) image = utils.draw_bbox(frame, bboxes) # image = utils.draw_bbox(frame, bboxes, vid.get(1)) # 保存为 iou_tracker 格式 detections = save_mod(bboxes, 0.6) result = np.asarray(image) if self.window.writeVideo_flag: # save a frame out.write(result) if self.window.showVideo_flag: self.window.update_graphic_viewer(result) pbar.update(1) self.window.ui.processrate.setText(str(pbar)) else: pbar.update(1) self.window.ui.processrate.setText(str(pbar)) # Release everything if job is finished out.release() pbar.close() # 多目标追踪 trackers = track_viou_video(self.window.media_path ,detections , 0.5, 0.6, 0.1, 23, 16, 'MEDIANFLOW', 1.0) # 保存 trackers with open(self.window.pickle_file_path, 'wb') as pk_f: pickle.dump(trackers, pk_f) self.window.ui.processrate.setText('=> saved trackers to pk file.')
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array one batch Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) # original wider face data h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale im_resized = self.resize_image(im, current_scale) # scale = 1.0 current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() i = 0 # print('-------------------------- >>> ') while min(current_height, current_width) >= net_size: feed_imgs = [] # img = torch.from_numpy(im_resized) img = img.unsqueeze_(0) feed_imgs = img.permute(0, 3, 1, 2) if torch.cuda.is_available(): feed_imgs = feed_imgs.cuda() # receptive field is 12×12 # 12×12 --> score # 12×12 --> bounding box cls_map, reg = self.pnet_detector(feed_imgs.float()) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) # cv2.imshow('pnet_image',im_resized) # cv2.waitKey(0) # cv2.destroyAllWindows() # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion keep = utils.nms(boxes[:, :5], 0.2, 'Union') boxes = boxes[keep] # print(boxes.shape) all_boxes.append(boxes) # i+=1 if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # print("shape of all boxes {0}".format(all_boxes.shape)) # time.sleep(5) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.3, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] # x2 - x1 # y2 - y1 bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], ]) boxes = boxes.T # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction) align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], ]) boxes_align = boxes_align.T return boxes, boxes_align
def video_without_saving(ip, threshold): # 对方socket des_socket = socket(AF_INET, SOCK_STREAM) # 链接服务器 des_socket.connect(('127.0.0.1', 8000)) classes = utils.read_class_names(cfg.YOLO.CLASSES) num_classes = len(classes) return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] pb_file = "./yolov3_coco.pb" video_path = ip input_size = 416 graph = tf.Graph() return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: messageId = 0 encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 50] # 之后修改 # vid = cv2.VideoCapture(video_path) vid = cv2.VideoCapture(0) while True: # time.sleep(0.01) curr_time = datetime.datetime.now() timestamp = '%s-%s-%s %s:%s:%s' % ( curr_time.year, curr_time.month, curr_time.day, curr_time.hour, curr_time.minute, curr_time.second) return_value, frame = vid.read() result_, imgencode = cv2.imencode('.jpg', frame, encode_param) data = np.array(imgencode) stringData = data.tostring() length = len(stringData) if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, threshold) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break messageId += 1 mess_send(des_socket, bboxes, timestamp, messageId, ip, length,stringData)
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im: an input image h, w, c = im.shape if dets is None: return None, None # return square boxes dets = self.square_bbox(dets) # rounds dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting RNet batch size batch_size = self.rnet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset RNet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_LINEAR) # crop_im = (crop_im - 127.5) / 128. # print('------------>>>>>. rrr') crop_im_tensor = torch.from_numpy(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) # feed_imgs = Variable(torch.stack(cropped_ims_tensors)) feed_imgs = (torch.stack(cropped_ims_tensors)) feed_imgs = feed_imgs.permute(0, 3, 1, 2) #------------------------------------------------------------------------------- if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs.float()) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.4) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] # keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 boxes = np.vstack([ keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2], keep_boxes[:, 3], keep_cls[:, 0] ]) align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0] ]) boxes = boxes.T boxes_align = boxes_align.T return boxes, boxes_align
def car_alarm(image, car_detect, cfg, mask_path_list, alarm_list): # img_file = get_latest_image(cfg['scene/' + cfg['camera_no']]) # image = cv2.imread(img_file) # if image is None: # return # input_data transform # input_size = cfg['input_size'] image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) original_image = image original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] # model infer pred_sbbox, pred_mbbox, pred_lbbox = car_detect.infer(image_data) # post process get final bboxes num_classes = cfg['num_classes'] pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.5) bboxes = utils.nms(bboxes, 0.45, method='nms') # # set judgement_matrix and estimate # obj_dict = cfg['obj_dict'] # mask_dict = cfg['mask_dict'] # estimate = get_obj_num(bboxes, mask_path_list, obj_dict) # judgement_matrix = [[0,1,1], #可停车区域 # [1,1,1]] #不可停车区域 # alarm = 0 # for i, obj in obj_dict.items(): # for j, mask in mask_dict.items(): # if judgement_matrix[j][i] == 1 and estimate[j, i] !=0: # # print('在%s上有%d个%s' % (mask_dict[j], estimate[j, i], obj_dict[i][0])) # alarm = 1 # res = {'alarm': alarm} res = {} objs = [] for bbox in bboxes: objs.append({ 'x1': bbox[0], 'y1': bbox[1], 'x2': bbox[2], 'y2': bbox[3], 'confidence': bbox[4], 'class': bbox[5] }) res['objs'] = objs return res
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes_align: numpy array boxes after calibration landmarks_align: numpy array landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR) crop_im_tensor = torch.from_numpy(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) # feed_imgs = Variable(torch.stack(cropped_ims_tensors)) feed_imgs = (torch.stack(cropped_ims_tensors)) feed_imgs = feed_imgs.permute(0, 3, 1, 2) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.onet_detector(feed_imgs.float()) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.6, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] # keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh # align_landmark_topx = keep_boxes[:, 0] # align_landmark_topy = keep_boxes[:, 1] boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], ]) boxes_align = boxes_align.T return None, boxes_align
prev_time = time.time() pred_bbox = model.predict_on_batch( image_data ) # make predictions, output [[b,52,52,3,5+c], [b,26,26,3,5+c], [b,13,13,3,5+c]] curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] # reshape to [[-1,5+c], [-1,5+c], [-1,5+c]] pred_bbox = tf.concat(pred_bbox, axis=0) # concat into [-1, 5+c] bboxes = utils.postprocess_boxes( pred_bbox, frame_size, input_size, 0.3) # discard low score and out-of-bound boxes bboxes = utils.nms( bboxes, 0.45, method='nms') # discard duplicate boxes which point at the same object image = utils.draw_bbox(frame, bboxes) # draw boxes on the image result = np.asarray(image) # convert into numpy array info = "time: %.2f ms" % (1000 * exec_time) cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 0, 0), thickness=2) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result)
def test(model, SCORE_THRESHOLD, draw_boxes=False): INPUT_SIZE = 416 CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) # TODO cups:改动部分使用TODO标记 predicted_dir_path = "./mAP/predicted" ground_truth_dir_path = "./mAP/ground-truth" if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) # import show_tools time_sum = 0 with open(cfg.TEST.ANNOT_PATH, "r") as annotation_file: ori_list = annotation_file.readlines() ''' ori_fifty_list = ori_list[-50:] other_list = ori_list[:-50] other_25_list = random.choices(other_list, k=25) annotation_file_list = ori_fifty_list + other_25_list ''' test_sum = len(ori_list) print("test " + str(test_sum) + " pictures") j = "|" k = "=" kong = " " for num, line in enumerate(ori_list): # show_tools.view_bar('image_enhancement', num, len(annotation_file.readlines())) annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split("/")[-1] img_name = image_name.split(".")[0] bbox_data_gt = np.array( [list(map(int, box.split(","))) for box in annotation[1:]], dtype=np.int16) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, img_name + ".txt") # TODO cups # print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, "w") as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i] - 1] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = " ".join([class_name, xmin, ymin, xmax, ymax ]) + "\n" f.write(bbox_mess) # TODO cups # print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, img_name + ".txt") # Predict Process image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) predict_model = tf.keras.Model( inputs=model.get_layer('input_1').input, outputs=[ model.get_layer('tf_op_layer_concat_4').output, model.get_layer('tf_op_layer_concat_7').output, model.get_layer('tf_op_layer_concat_10').output ]) start = timeit.default_timer() pred_bbox = predict_model(image_data) pred_bbox = [ tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox ] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method="nms") end = timeit.default_timer() time_sum += end - start if cfg.TEST.DECTECTED_IMAGE_PATH is not None and draw_boxes == True: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, "w") as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = "%.4f" % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ( " ".join([class_name, score, xmin, ymin, xmax, ymax]) + "\n") f.write(bbox_mess) final_str = j + k * (int((num + 1) / 3) + 1) + ">" + kong * (int( (test_sum - 1 - num) / 3)) + "| %d" % (int( (num + 1) / test_sum * 100)) + "%" print(final_str, end='\r') print("") print("average consume time:", time_sum / (num + 1)) print("test predit done")
def main(_argv): INPUT_SIZE = FLAGS.size if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # Build Model if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array( [list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == "tf": pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) elif FLAGS.model == 'yolov4': XYSCALE = cfg.YOLO.XYSCALE pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE) pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 image = utils.draw_bbox(original_image, bboxes, classes=CLASSES) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
if os.path.isfile(img_path_file): img = cv2.imread(img_path_file) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_size = img.shape[:2] image_data = utils.image_preporcess(np.copy(img), [input_size, input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = sess.run([return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={ return_tensors[0]: image_data}) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, img_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') if len(bboxes) > 0: image = utils.draw_bbox(img, bboxes) #image = Image.fromarray(image) #image.show() out_img = np.asarray(image) score = bboxes[0][4] file_name, file_path = os.path.split(img_path_file) file, postfix = os.path.splitext(file_name) out_file = os.path.join(out_path, str(score) + '_' + file_name) cv2.imwrite(out_file, out_img) elif os.path.isdir(img_path_file): img_files = os.listdir(img_path_file)
def work_frame(filename, count): global flag,model,server_addr,input_size start_time = datetime.datetime.now() # GET frame from server by server /static reference video_id = "video_" + str(filename) + "_frame_" + str(count) + ".jpg" ref_file = "static/" + video_id response = requests.get(server_addr + ref_file) # Image transformation to accepted format arr = np.asarray(bytearray(response.content), dtype=np.uint8) original_image = cv2.imdecode(arr, -1) ###### OBJECT DETECTION CODE ####### # Read class names class_names = {} with open(cfg.YOLO.CLASSES, 'r') as data: for ID, name in enumerate(data): class_names[ID] = name.strip('\n') # Setup tensorflow, keras and YOLOv3 original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if flag: input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, "./yolov3.weights") flag = False pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') # We have our objects detected and boxed, lets move the class name into a list objects_detected = [] for x0,y0,x1,y1,prob,class_id in bboxes: objects_detected.append(class_names[class_id]) ### END OF OBJECT DETECTION CODE ### print(objects_detected) final_time = datetime.datetime.now() - start_time # Elaborate json with frame info and post to server in /return route final_dict={} people_count=0 for obj in objects_detected: if str(obj) == "person": people_count+=1 if str(obj) in final_dict: final_dict[str(obj)] += 1 else: final_dict[str(obj)] = 1 final_json = { "video_id":filename, "frame_no":count, "processing_time":str(final_time), "people_detected":people_count, "objects_detected":json.dumps(final_dict) } requests.post(server_addr + "return", json=final_json) return "\nDONE frame n. " + str(count) + "of video " + filename + "!\n"
def main(_argv): import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video if video_path == 'none': possible_camera_index = [5, 6, 7, 8] print("Searching for camera...") for camera_index in possible_camera_index: vid = cv2.VideoCapture(camera_index) return_value, frame = vid.read() if frame is not None: print("Camera found at index", camera_index) break else: print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') for box in bboxes: print('x_min', box[0]) print('y_min', box[1]) print('x_max', box[2]) print('y_max', box[3]) print('probability', box[4]) print('object_id', box[5]) print('-----') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(cv2.getWindowImageRect('result')) if cv2.waitKey(1) & 0xFF == ord('q'): vid.release() break
def main(): if not os.path.exists(args.output): os.mkdir(args.output) testset = Dataset('test') test_generator = tf.data.Dataset.from_generator( lambda: testset, (tf.float32, (tf.string, tf.int32, tf.int32))).batch(cfg.TEST.BATCH_SIZE) classes = utils.read_class_names(cfg.YOLO.CLASSES) STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(classes) XYSCALE = cfg.YOLO.XYSCALE input_size = cfg.TEST.INPUT_SIZE model = YOLOv4(NUM_CLASS, STRIDES, ANCHORS, XYSCALE, 'test') if args.pretrained: dummy_input = np.ones((1, input_size, input_size, 3)) model.predict(dummy_input) model.load_weights('./weights/pretrained.h5') print('Pretrained weights loaded') elif args.weights is not None: ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, args.weights, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored') else: print('Failed to load latest checkpoint') times = 0.0 for index, (image_data, image_meta) in enumerate(test_generator): if index % 100 == 0 or index == 0: print('Processing {}/{} images...'.format(index + 1, len(testset))) original_image_size = (image_meta[1].numpy().item(), image_meta[2].numpy().item()) start_time = time.time() pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') times += (time.time() - start_time) bboxes = sorted(bboxes, key=lambda x: -x[4]) f = open( os.path.join( args.output, image_meta[0].numpy().item().decode( 'utf-8').split('/')[-1][:-4] + '.txt'), 'w') for i, b in enumerate(bboxes): class_name = classes[int(b[5])] conf = str(b[4]) xmin, ymin, xmax, ymax = str(int(b[0])), str(int(b[1])), str( int(b[2])), str(int(b[3])) predicted = [class_name, conf, xmin, ymin, xmax, ymax, '\n'] if i < len(bboxes) - 1: f.write(' '.join(predicted)) else: f.write(' '.join(predicted[:-1])) f.close() print('All test images were processed. FPS is {:.2f}'.format( (len(testset) / times)))
pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, confidence, only_save_one) bboxes = utils.nms(bboxes, 0.45, method='nms') image = Image.fromarray( cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR)) # Save originals orig_path = os.path.join(os.path.join(output_dir, 'orig'), str(index_dict['index']) + '.jpg') image.save(orig_path) row_string = orig_path plate_num = plate_rules(url_dict['plates'][int( index_dict['index'])]) for bbox in bboxes: """ bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size video_path = FLAGS.video_path score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][video] input_size : {input_size}') print(f'[DEBUG][video] video_path : {video_path}') print(f'[DEBUG][video] score_thresh : {score_thresh}') print(f'[DEBUG][video] iou_thresh : {iou_thresh}') print(f'[DEBUG][video] save_path : {save_path}') print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][video] Created input_layer of size {input_size}') print(f'[DEBUG][video] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][video] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][video] Execution took {(1000 * (toc - tic)):0.4f} ms') vid = cv2.VideoCapture(video_path) if save_path: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print(f"[DEBUG][video] Video CODEC : {FLAGS.save_path.split('.')[1]}") codec = cv2.VideoWriter_fourcc(*'MJPEG') out = cv2.VideoWriter(FLAGS.save_path, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: print(f'[DEBUG] Got video capture') frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print(f"[DEBUG][video] Video Over") vid.release() if save_path: out.release() break #raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.perf_counter() pred_bbox = model.predict(image_data) print(f'[INFO][video] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.perf_counter() exec_time = curr_time - prev_time result = np.asarray(image) info = "fdpms: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(result.shape) if save_path: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break