def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = cv2.imread(image_path) if self.dataset_type == "converted_coco": bboxes = np.array( [list(map(int, box.split(","))) for box in line[1:]]) elif self.dataset_type == "yolo": height, width, _ = image.shape bboxes = np.array( [list(map(float, box.split(","))) for box in line[1:]]) bboxes = bboxes * np.array([width, height, width, height, 1]) bboxes = bboxes.astype(np.int64) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes), ) return image, bboxes
def parse_annotation(self, annotation): line = annotation.split() image = self.images[line[0]] if self.dataset_type == "converted_coco": bboxes = np.array( [list(map(int, box.split(","))) for box in line[1:]]) elif self.dataset_type == "yolo": height, width, _ = image.shape bboxes = np.array( [list(map(float, box.split(","))) for box in line[1:]]) bboxes = bboxes * np.array([width, height, width, height, 1]) bboxes = bboxes.astype(np.int64) if self.data_aug: image, bboxes = self.random_horizontal_flip(image, bboxes) image, bboxes = self.random_crop(image, bboxes) image, bboxes = self.random_translate(image, bboxes) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( image, [self.train_input_size, self.train_input_size], bboxes, ) return image, bboxes
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape image_data = utils.image_preprocess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.sess.run( [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox], feed_dict={ self.input_data: image_data, self.trainable: False }) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold) bboxes = utils.nms(bboxes, self.iou_threshold) return bboxes
def detect_images(model, image_path, box=None, output_path="", id=0, write_file=True, show=False): """Object classification of the given image. Run the yolo model on the given image. With post process including nms. Save the output image to file or show the image if specified. Args: model: The yolo model to be used. image_path: path to the image. box: bounding box coordinates. Should be a list like: [x1, y1, x2, y2]. output_path: path to write the output image. id: index of bounding box for a given frame. show: whether to show the image for display. """ original_image = cv2.imread(image_path) if box: original_image = original_image[box[1]:box[3], box[0]:box[2]] original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data, old_image_size, new_image_size = utils.image_preprocess( np.copy(original_image)) image_data = image_data[np.newaxis, ...].astype(np.float32) # pred_bbox = model.predict(image_data) pred_bbox = model.predict_on_batch(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, old_image_size, new_image_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(original_image, bboxes) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if id: i = output_path.rfind('.') output_path = output_path[:i] + '_' + str(id) + output_path[i:] if output_path != '' and write_file: i = output_path.rfind('/') output_directory = output_path[:i] if not os.path.exists(output_directory): os.makedirs(output_directory) cv2.imwrite(output_path, image) if show: # Show the image cv2.imshow("predicted image", image) # Load and hold the image cv2.waitKey(0) # To close the window after the required kill value was provided cv2.destroyAllWindows()
def frame_to_gpu(frame, gpu): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cur_frame = np.copy(frame) im_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) im_data = im_data[np.newaxis, ...].astype(np.float32) with tf.device(gpu): im_data = tf.convert_to_tensor(im_data) return im_data
def representative_data_gen(): fimage = open(FLAGS.dataset).read().split() for input_value in range(100): if os.path.exists(fimage[input_value]): original_image=cv2.imread(fimage[input_value]) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.input_size, FLAGS.input_size]) img_in = image_data[np.newaxis, ...].astype(np.float32) print(input_value) yield [img_in] else: continue
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " %image_path) image = np.array(cv2.imread(image_path)) bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) if self.data_aug: image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = utils.image_preprocess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = grpc_yolov4_client(FLAGS.host, FLAGS.model, image_data, shape_size=FLAGS.size) if FLAGS.model == 'yolov4': if FLAGS.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def main(): STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = args.size image_path = args.input original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) model = YOLOv4(NUM_CLASS, STRIDES, ANCHORS, XYSCALE, 'test') if args.pretrained: dummy_input = np.ones((1, args.size, args.size, 3)) model.predict(dummy_input) model.load_weights('./weights/pretrained.h5') print('Pretrained weights loaded') elif args.weights is not None: ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, args.weights, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored') else: print('Failed to load latest checkpoint') pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes, utils.read_class_names(cfg.YOLO.CLASSES)) image = Image.fromarray(image) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(os.path.join(args.output, 'result.png'), image)
def parse_data(self, image_meta, annos): if not os.path.exists(image_meta[0]): raise KeyError("%s does not exist ..." % image_meta[0]) image = cv2.imread(image_meta[0], cv2.IMREAD_COLOR) bboxes = np.array( annos) # shape of [num_bboxes, 5] (minx, miny, maxx, maxy, class) if self.data_aug: image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_scale(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_rotate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def detect_yolov3(vid,model,input_size): # model.summary() return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.preprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) with lock: outputFrame =image
def parse_annotations(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("%s does not exist..." % image_path) image = cv2.imread(image_path) # 注意这里也有维度扩展的操作,不然在图像增强的代码中会出现too many indices for array bboxes = np.array([list(map(int, box.split(","))) for box in line[1:]]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess(np.copy(image), self.train_input_size, np.copy(bboxes)) return image, bboxes
def parse_annotation(self, annotation): line = annotation.split() image_path = line[1] if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = cv2.imread(image_path) bboxes = np.array([box for box in line[4:]]).reshape((-1, 5)) bboxes = bboxes.astype('int') if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) return image, bboxes
def parse_annotation(self, annotation): line = annotation.split() image_path = line[0] if not os.path.exists(image_path): raise KeyError("{} does not exits.".format(image_path)) image = np.array(cv2.imread(image_path)) bboxes = np.array([ list(map(lambda x: int(float(x)), box.split(','))) for box in line[1:] ]) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) # line 151 is the final code, 152 for test image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) # image, bboxes = utils.image_preprocess(np.copy(image), [self.train_input_sizes[-1], self.train_input_sizes[-1]], np.copy(bboxes)) # for test return image, bboxes
def representative_data_gen(): lines = open(FLAGS.dataset).read().split("\n") line = 0 found = 0 samples = 10 for input_value in range(samples): line += 1 file = lines[input_value].split(" ")[0] if os.path.exists(file): original_image = cv2.imread(file) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.input_size, FLAGS.input_size]) img_in = image_data[np.newaxis, ...].astype(np.float32) print("Reading calibration image {}".format(file)) found += 1 yield [img_in] else: print("File does not exist %s in %s at line %d" % (file, FLAGS.dataset, line)) continue if found < samples: raise ValueError("Failed to read %d calibration sample images from %s" % (samples, FLAGS.dataset))
def parse_annotation(self, annotation): line = annotation.split( ) #문제는 ____ (19).png 처럼 띄어져 있는 것들이 문제/ 띄어쓰기 이후에는 bounding box들이 나와야하기 때문 image_path = line[ 0] #이미 line[0]에 png가 같이 들어가는경우 하나와 #line[1]에 png가 있는 경우 2 if '.png' in line[1]: image_path = image_path + ' ' + line[1] del line[1] #print(image_path) if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = cv2.imread(image_path) if self.dataset_type == "converted_coco": bboxes = np.array( [list(map(int, box.split(","))) for box in line[1:]]) elif self.dataset_type == "yolo": height, width, _ = image.shape bboxes = np.array( [list(map(float, box.split(","))) for box in line[1:]]) bboxes = bboxes * np.array([width, height, width, height, 1]) bboxes = bboxes.astype(np.int64) if self.data_aug: image, bboxes = self.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image, bboxes = utils.image_preprocess( np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes), ) return image, bboxes
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') @tf.function def run_model(x): return model(x) # Test the TensorFlow Lite model on random input data. sum = 0 original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img_raw = tf.expand_dims(img_raw, 0) img_raw = tf.image.resize(img_raw, (FLAGS.size, FLAGS.size)) batched_input = tf.constant(image_data) for i in range(1000): prev_time = time.time() # pred_bbox = model.predict(image_data) if FLAGS.framework == 'tf': pred_bbox = [] result = run_model(image_data) for value in result: value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') elif FLAGS.framework == 'trt': pred_bbox = [] result = infer(batched_input) for key, value in result.items(): value = value.numpy() pred_bbox.append(value) if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time if i == 0: continue sum += (1 / exec_time) info = str(i) + " time:" + str(round( exec_time, 3)) + " average FPS:" + str(round( sum / i, 2)) + ", FPS: " + str(round((1 / exec_time), 1)) print(info)
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][webcam] input_size : {input_size}') print(f'[DEBUG][webcam] score_thresh : {score_thresh}') print(f'[DEBUG][webcam] iou_thresh : {iou_thresh}') print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][webcam] Created input_layer of size {input_size}') print(f'[DEBUG][webcam] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][webcam] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][webcam] Execution took {(1000 * (toc - tic)):0.4f} ms') vid = cv2.VideoCapture(0) if save_path: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print(f"[DEBUG][video] Video CODEC : {FLAGS.save_path.split('.')[1]}") codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(FLAGS.save_path, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: print(f'[DEBUG] Got video capture') frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image! Try with another video format") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.perf_counter() pred_bbox = model.predict(image_data) print(f'[INFO][webcam] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.perf_counter() exec_time = curr_time - prev_time result = np.asarray(image) info = "fdpms: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) print(result.shape) if save_path: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() out.release()
def main(argv): NUM_CLASS = 2 ANCHORS = [ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 ] ANCHORS = np.array(ANCHORS, dtype=np.float32) ANCHORS = ANCHORS.reshape(3, 3, 2) STRIDES = [8, 16, 32] XYSCALE = [1.2, 1.1, 1.05] input_size = FLAGS.size image_path = FLAGS.image_path score_thresh = FLAGS.score_thresh iou_thresh = FLAGS.iou_thresh save_path = FLAGS.save_path print(f'[DEBUG][image] input_size : {input_size}') print(f'[DEBUG][image] image_path : {image_path}') print(f'[DEBUG][image] score_thresh : {score_thresh}') print(f'[DEBUG][image] iou_thresh : {iou_thresh}') original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] print(f'[DEBUG][image] original_image_size : {original_image_size}') image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) print('[INFO] Bulding Yolov4 architecture') tic = time.perf_counter() input_layer = tf.keras.layers.Input([input_size, input_size, 3]) print(f'[INFO][image] Created input_layer of size {input_size}') print(f'[DEBUG][image] input_layer : {input_layer}') feature_maps = YOLOv4(input_layer, NUM_CLASS) print(f'[DEBUG][image] feature_maps : {feature_maps}') bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensors.append(decode(fm, NUM_CLASS, i)) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) toc = time.perf_counter() print(f'[INFO] Architecture built.') print(f'[DEBUG][image] Execution took {(1000 * (toc - tic)):0.4f} ms') pred_bbox = model.predict(image_data) print(f'[INFO][image] Finished initial predication on image') pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, score_thresh) bboxes = utils.nms(bboxes, iou_thresh, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show() if (save_path): image.save(save_path) print(f'[INFO][image] Detected image saved to {save_path}')
for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, "./yolov3.weights") model.summary() vid = cv2.VideoCapture(0) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict(image_data) curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.preprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) result = np.asarray(image) info = "time: %.2f ms" %(1000*exec_time)
def main(_argv): print('Arguments', _argv) print('Flags', flags) FLAGS.tiny = False print('Tiny ', FLAGS.tiny) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE_TINY else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) XYSCALE = cfg.YOLO.XYSCALE NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) print('image:', original_image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) model.summary() utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': if FLAGS.tiny: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE, RESIZE=1.5) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) #image.show() print('Image path', image_path) print('Type Image path', type(image_path)) print('Bboxes type', type(bboxes)) classes = utils.read_class_names(cfg.YOLO.CLASSES) list_bboxes = [] for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) #print('type bbox',type(bbox)) #print('bbox',bbox[:4]) #print('coor',list(coor)) bbox_info = { 'coor': list(coor), 'probability': score, 'class': classes[class_ind] } list_bboxes.append(bbox_info) try: output_name = os.path.join('results/out_' + os.path.basename(image_path)) image.save(output_name) #cv2.imwrite(output_name,img) print('Img saved to', output_name) output = pd.DataFrame(list_bboxes) print('image_path', image_path) output_name = '.'.join(output_name.split('.')[:2]) + '.xlsx' #output_name = 'results/out_'+image_path.split('\\')[-1].split('.')[0]+'.xlsx' print('output_name', output_name) output.to_excel(output_name) except Exception as e: print(e)
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size video_path = FLAGS.video print("Video from: ", video_path) vid = cv2.VideoCapture(video_path) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: cv2.destroyWindow("result") raise ValueError("No image! Try with another video format") while cv2.getWindowProperty('window-name', 0) >= 0: keyCode = cv2.waitKey(50) #break frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) print(info) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): INPUT_SIZE = FLAGS.size if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) times = [] classes = [ 'Book', 'Bottle', 'Computer keyboard', 'Computer mouse', 'Laptop', 'Mobile phone', 'Backpack' ] # Build Model if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array( [list(map(float, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') current_class = '' print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): # esto class_name = CLASSES[classes_gt[i]] # esto if i == 0: current_class = class_name class_name = CLASSES[classes_gt[i]] if class_name == current_class: xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join( [class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == "tf": startTime = time.time() pred_bbox = model.predict(image_data) times.append(time.time() - startTime) else: interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) elif FLAGS.model == 'yolov4': XYSCALE = cfg.YOLO.XYSCALE pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE) pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: if (CLASSES[int(bbox[5])] in classes) and (current_class == CLASSES[int( bbox[5])]): coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines) print("Elapsed time: " + str(sum(times) / len(times)))
print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) pred_bbox = model.predict(image_data) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = utils.draw_bbox(image, bboxes) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) with open(predict_result_path, 'w') as f: for bbox in bboxes: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4]
num_bbox_gt = len(bboxes_gt) # 将gt_bbox信息写入文件 with open(ground_truth_path, 'w') as f: for i in range(bboxes_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) # 写文件 --> str gt_bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(gt_bbox_mess) print('\t' + str(gt_bbox_mess).strip()) # predict process print('predict result of %s:' % image_name) predicted_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') image_size = image.shape[:2] image_data = utils.image_preprocess(np.copy(image), INPUT_SIZE) # np.copy() image_data = image[np.newaxis, :, :] pred_bbox = model.predict(image_data) # 3 * 3 --> 3, pred_bbox = [tf.reshape(-1, (tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis = 0) bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') # 图片写道路径 if cfg.TEST.DETECTED_IMAGE_PATH is not None: image = utils.draw_bboxes(image, bboxes) cv2.imwrite(cfg.TEST.DETECTED_IMAGE_PATH + image_name, image)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) # TODO: switch to get vertical if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] logging.info("Model loaded!") while True: return_value, frame = vid.read() # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) # TODO: here if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) curr_time = time.time() times.append(curr_time - prev_time) times = times[-20:] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.213, method='nms') # 0.213 image = utils.draw_bbox(frame, bboxes, classes=CLASSES) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 24), # 24 cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # 0.7 image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
from PIL import Image return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] pb_file = "./yolov3_coco.pb" image_path = "./docs/images/example.jpg" num_classes = 80 input_size = 416 graph = tf.Graph() original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) key_list = list(CLASSES.keys()) val_list = list(CLASSES.values()) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and CLASSES[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(CLASSES[int(bbox[5])]) boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() class_name = track.get_class() tracking_id = track.track_id index = key_list[val_list.index(class_name)] tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) image = utils.draw_bbox(frame, tracked_bboxes, classes=CLASSES, tracking=True) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 36), # 24 cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() pred_bbox = model.predict(image_data) else: # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes) image = Image.fromarray(image) image.show()
def main(): tf.executing_eagerly() strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") with strategy.scope(): # if True: STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE WEIGHTS = './data/yolov4.weights' #must end in .weights video_path = './data/road.mp4' video_path = './data/AOTsample3.mp4' #video_path = './data/vtest.avi' #video_path = './data/20190422_153844_DA4A.mkv' print("Video from: ", video_path ) #vid = cv2.VideoCapture(video_path) print('thread started') INPUT_SIZE = 419 #608 #230 #open file to output to output_f = video_path[:-3] + 'txt' f = open(output_f, 'w') print('file started') #generate model input_layer = tf.keras.Input([INPUT_SIZE, INPUT_SIZE, 3]) print('tensors started 1') feature_maps = YOLOv4(input_layer, NUM_CLASS) print('tensors started 2') bbox_tensors = [] print('tensors started 3') for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) print('tensors started 4') model = tf.keras.Model(input_layer, bbox_tensors) print('model built') #force to run eagerly model.run_eagerly = True if model.run_eagerly: print ('yeeyee') else: print ('hawhaw') utils.load_weights(model, WEIGHTS) with tf.device('/GPU:0'): buf = Queue(maxsize=8) # buf = VidThread(video_path) # buf.start() vid = cv2.VideoCapture(video_path) coord = tf.train.Coordinator() t = Thread(target=MyLoop, args=(video_path, buf,vid, coord)) t.daemon = True #coord.register_thread(t) t.start() time.sleep(1.0) try: while not buf.empty(): frame = buf.get() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) dt = str(datetime.datetime.now()) frame_size = frame.shape[:2] #resize image and add another dimension cur_frame = np.copy(frame) image_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() with tf.device('/GPU:0'): image_data = tf.convert_to_tensor(image_data) print(image_data.device) curr_time = time.time() exec_time = curr_time - prev_time info = "time1: %.2f ms" %(1000*exec_time) print(info) prev_time = time.time() #make bboxes pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, INPUT_SIZE, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') #output bbox info to file and show image #calculate and display time it took to process frame utils.video_write_info(frame, f, bboxes, dt) image = utils.draw_some_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time info = "time2: %.2f ms" %(1000*exec_time) print(info) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_NORMAL) result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) #swapped image with result, not sure what the effect was cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break #end video, close viewer, stop writing to file vid.release() cv2.destroyAllWindows() f.close() #if interrupted, end video, close viewer, stop writing to file except: print("Unexpected error:", sys.exc_info()[0]) vid.release() cv2.destroyAllWindows() f.close()