def save_tf(): NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_layer = tf.keras.layers.Input([FLAGS.input_size, FLAGS.input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) else: print("model option can be only 'yolov3' or 'yolov4'.") return model.summary() model.save(FLAGS.output)
def ocr(img, data): boxes, scores, classes, num_objects = data class_names = read_class_names(cfg.YOLO.CLASSES) for i in range(num_objects): # get class name for detection class_index = int(classes[i]) class_name = class_names[class_index] # separate coordinates from box xmin, ymin, xmax, ymax = boxes[i] # get the subimage that makes up the bounded region and take an additional 5 pixels on each side box = img[int(ymin) - 5:int(ymax) + 5, int(xmin) - 5:int(xmax) + 5] # grayscale region within bounding box gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY) # threshold the image using Otsus method to preprocess for tesseract thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # perform a median blur to smooth image slightly blur = cv2.medianBlur(thresh, 3) # resize image to double the original size as tesseract does better with certain text size blur = cv2.resize(blur, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) # run tesseract and convert image text to string try: text = pytesseract.image_to_string(blur, config='--psm 11 --oem 3') print("Class: {}, Text Extracted: {}".format(class_name, text)) except: text = None
def crop_objects(img, data, path, allowed_classes): boxes, scores, classes, num_objects = data class_names = read_class_names(cfg.YOLO.CLASSES) #create dictionary to hold count of objects for image name counts = dict() cropped_images = dict() for i in range(num_objects): # get count of class for part of image name class_index = int(classes[i]) class_name = class_names[class_index] if class_name in allowed_classes: counts[class_name] = counts.get(class_name, 0) + 1 # get box coords xmin, ymin, xmax, ymax = boxes[i] # crop detection from image (take an additional 5 pixels around all edges) cropped_img = img[int(ymin) - 5:int(ymax) + 5, int(xmin) - 5:int(xmax) + 5] # construct image name and join it to path for saving crop properly img_name = class_name + '_' + str(counts[class_name]) + '.png' img_path = os.path.join(path, img_name) cropped_images[counts[class_name]] = img_path # save image cv2.imwrite(img_path, cropped_img) license_plate_easy_ocr.license_plate_detection(cropped_images) else: continue
def __init__(self, dataset_type): self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG self.train_input_sizes = cfg.TRAIN.INPUT_SIZE self.strides = np.array(cfg.YOLO.STRIDES) self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) #array([[[ 10., 13.], # [ 16., 30.], # [ 33., 23.]], # # [[ 30., 61.], # [ 62., 45.], # [ 59., 119.]], # # [[116., 90.], # [156., 198.], # [373., 326.]]], dtype=float32) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.max_bbox_per_scale = 150 self.annotations = self.load_annotations(dataset_type) self.num_samples = len(self.annotations) self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) self.batch_count = 0
def __init__(self, input_data, trainable): self.trainable = trainable self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_class = len(self.classes) self.strides = np.array(cfg.YOLO.STRIDES) self.anchors = utils.get_anchors(cfg.YOLO.ANCHORS) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD try: self.conv_lbbox, self.conv_mbbox, self.conv_sbbox = self.__build_nework( input_data) except: raise NotImplementedError("Can not build up yolov3 network!") with tf.variable_scope('pred_sbbox'): self.pred_sbbox = self.decode(self.conv_sbbox, self.anchors[0], self.strides[0]) with tf.variable_scope('pred_mbbox'): self.pred_mbbox = self.decode(self.conv_mbbox, self.anchors[1], self.strides[1]) with tf.variable_scope('pred_lbbox'): self.pred_lbbox = self.decode(self.conv_lbbox, self.anchors[2], self.strides[2])
def __init__(self, is_training: bool, dataset_type: str = "converted_coco"): self.tiny = False self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config() self.dataset_type = dataset_type self.annot_path = (cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH) self.input_sizes = (cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE) self.batch_size = (cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE) self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG self.train_input_sizes = cfg.TRAIN.INPUT_SIZE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.max_bbox_per_scale = 150 self.annotations = self.load_annotations() self.num_samples = len(self.annotations) self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) self.batch_count = 0
def __init__(self): # 图像路径 self.image_path = cfg.COMMON.IMAGE_PATH # 图像的后缀名 self.image_extension = cfg.COMMON.IMAGE_EXTENSION # xml 路径 self.annotation_path = cfg.COMMON.ANNOTATION_PATH # 获取 c 类 字典型 self.classes_dir = utils.read_class_names(cfg.COMMON.CLASS_FILE_PATH) self.classes_len = len(self.classes_dir) # 获取 c 类 list 型 self.classes_list = [ self.classes_dir[key] for key in range(self.classes_len) ] # 数据的百分比 self.test_percent = cfg.COMMON.TEST_PERCENT self.val_percent = cfg.COMMON.VAL_PERCENT # 各成分数据保存路径 self.train_data_path = cfg.TRAIN.TRAIN_DATA_PATH self.val_data_path = cfg.TRAIN.VAL_DATA_PATH self.test_data_path = cfg.TEST.TEST_DATA_PATH pass
def ocr(img, data): boxes, scores, classes, num_objects = data class_names = read_class_names(cfg.YOLO.CLASSES) for i in range(num_objects): class_index = int(classes[i]) class_name = class_names[class_index] xmin, ymin, xmax, ymax = boxes[i] box = img[int(ymin) - 5:int(ymax) + 5, int(xmin) - 5:int(xmax) + 5] gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] blur = cv2.medianBlur(thresh, 3) blur = cv2.resize(blur, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) try: text = pytesseract.image_to_string(blur, config='--psm 11 --oem 3') print("Class: {}, Text Extracted: {}".format(class_name, text)) except: text = None
def __init__(self, dataset_type): if dataset_type == 'second_train': batch_size = cfg.TRAIN.SECONDBATCH_SIZE dataset_type = 'train' else: batch_size = cfg.TRAIN.BATCH_SIZE self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE self.batch_size = batch_size if dataset_type == 'train' else cfg.TEST.BATCH_SIZE self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG self.train_input_sizes = cfg.TRAIN.INPUT_SIZE self.strides = np.array(cfg.YOLO.STRIDES) self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.max_bbox_per_scale = 150 self.annotations = self.load_annotations(dataset_type) self.num_samples = len(self.annotations) self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) self.batch_count = 0
def main(_argv): NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_layer = tf.keras.layers.Input([FLAGS.input_size, FLAGS.input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) save_model(model) save_tflite(model) demo()
def __call__(self, frame: NDArray[(Any, Any, 3), np.float32]) -> List[ObjectDetection]: batch_data = tf.constant(np.array([cv2.resize(frame, self.input_dim) / 255], np.float32)) model_signature = self.model.signatures['serving_default'] pred_bbox = model_signature(batch_data) for value in pred_bbox.values(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape(pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=self.max_output_size_per_class, max_total_size=self.max_total_size, iou_threshold=self.iou_threshold, score_threshold=self.score_threshold, ) class_names = (list(utils.read_class_names(cfg.YOLO.CLASSES).values())) frame_h, frame_w, no_channels = frame.shape return [ ObjectDetection( id=None, bbox=convert_bbox(boxes[0][i], frame_w, frame_h), obj_class=class_names[int(classes[0][i].numpy())], prob=scores[0][i].numpy(), pt=None, ) for i in range(valid_detections[0]) ]
def __init__(self, dataset_type): self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG self.train_input_size = cfg.TRAIN.INPUT_SIZE[0] self.strides = np.array(cfg.YOLO.STRIDES) self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) (X_train_orig, Y_train_orig), (X_test_orig, Y_test_orig) = cifar10.load_data() X_train = X_train_orig / 255. X_test = X_test_orig / 255. Y_train = tf.keras.utils.to_categorical(Y_train_orig, 10) Y_test = tf.keras.utils.to_categorical(Y_test_orig, 10) self.trainset = X_train self.testset = X_test self.y_train = Y_train self.y_test = Y_test self.run_flag = 'train' # self.annotations = self.load_annotations(dataset_type) self.num_samples = len(self.trainset) self.num_samples_test = len(self.testset) self.num_batchs = int(np.floor(self.num_samples / self.batch_size)) self.num_batchs_test = int( np.floor(self.num_samples_test / self.batch_size)) self.batch_count = 0
def __init__(self, input_data, trainable,score_threshold=0.3,iou_threshold=0.45): ''' :param input_data: :param trainable: ''' self.trainable = trainable self.classes = utils.read_class_names(cfg.YOLO.CLASSES)#类别列表 self.num_class = len(self.classes)#类别数量 self.strides = np.array(cfg.YOLO.STRIDES) self.anchors = utils.get_anchors(cfg.YOLO.ANCHORS)#anchors self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD#上采样方式 self.per_cls_maxboxes = 200 # 一张图像上,每一类别的检测结果做大数量 try: self.conv_lbbox,self.conv_mbbox,self.conv_sbbox = self.__build_network(input_data) except: raise NotImplementedError("Can not build up yolov3 network!") # 对输出加上name属性,这样在固化模型,生成.pb文件时,可以找到指定的节点 with tf.variable_scope('pred_sbbox'):# 检测小目标,和anchor[0]对应 self.pred_sbbox = self.decode(self.conv_sbbox,self.anchors[0],self.strides[0])# self.conv_sbbox.shaep:(batch_size,52,52,255),strides[0]=8, 52*8=416(416/52=8),一个特征点代表8*8的图像,检测小目标,对应的self.anchors[0]中的anchor宽高值必须是小目标的 with tf.variable_scope('pred_mbbox'): self.pred_mbbox = self.decode(self.conv_mbbox,self.anchors[1],self.strides[1]) with tf.variable_scope('pred_lbbox'): self.pred_lbbox = self.decode(self.conv_lbbox,self.anchors[2],self.strides[2]) with tf.variable_scope('pred_res'): # 最终检测结果 self.pred_res_boxes = self._get_pred_bboxes(input_data,score_threshold,iou_threshold)
def __init__(self): self.input_size = cfg.TEST.INPUT_SIZE self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) self.score_threshold = cfg.TEST.SCORE_THRESHOLD self.iou_threshold = cfg.TEST.IOU_THRESHOLD self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.annotation_path = cfg.TEST.ANNOT_PATH self.weight_file = cfg.TEST.WEIGHT_FILE self.write_image = cfg.TEST.WRITE_IMAGE self.write_image_path = cfg.TEST.WRITE_IMAGE_PATH self.show_label = cfg.TEST.SHOW_LABEL with tf.name_scope('input'): self.input_data = tf.placeholder(dtype=tf.float32, name='input_data') self.trainable = tf.placeholder(dtype=tf.bool, name='trainable') model = YOLOV3(self.input_data, self.trainable) self.pred_sbbox, self.pred_mbbox, self.pred_lbbox = model.pred_sbbox, model.pred_mbbox, model.pred_lbbox with tf.name_scope('ema'): ema_obj = tf.train.ExponentialMovingAverage(self.moving_ave_decay) self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.saver = tf.train.Saver(ema_obj.variables_to_restore()) self.saver.restore(self.sess, self.weight_file)
def save_tflite(): NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) model.summary() converter = tf.lite.TFLiteConverter.from_keras_model(model) if tf.__version__ >= '2.2.0': converter.experimental_new_converter = False if FLAGS.quantize_mode == 'int8': converter.optimizations = [tf.lite.Optimize.DEFAULT] elif FLAGS.quantize_mode == 'float16': converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [ tf.compat.v1.lite.constants.FLOAT16 ] elif FLAGS.quantize_mode == 'full_int8': converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] converter.allow_custom_ops = True converter.representative_dataset = representative_data_gen tflite_model = converter.convert() open(FLAGS.output, 'wb').write(tflite_model) logging.info("model saved to: {}".format(FLAGS.output))
def main(_argv): NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, FLAGS.weights) logging.info('weights loaded') # Test the TensorFlow Lite model on random input data. sum = 0 for i in range(1000): img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) original_image = cv2.imread(FLAGS.image) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [FLAGS.size, FLAGS.size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() pred_bbox = model.predict(image_data) # pred_bbox = pred_bbox.numpy() curr_time = time.time() exec_time = curr_time - prev_time if i == 0: continue sum += (1000 / (1000 * exec_time)) info = "average FPS:" + str(round(sum / i, 2)) + ", FPS: " + str( round((1000 / (1000 * exec_time)), 1)) print(info)
def main(): STRIDES = np.array(cfg.YOLO.STRIDES) ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) XYSCALE = cfg.YOLO.XYSCALE input_size = args.size image_path = args.input original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) model = YOLOv4(NUM_CLASS, STRIDES, ANCHORS, XYSCALE, 'test') if args.pretrained: dummy_input = np.ones((1, args.size, args.size, 3)) model.predict(dummy_input) model.load_weights('./weights/pretrained.h5') print('Pretrained weights loaded') elif args.weights is not None: ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, args.weights, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored') else: print('Failed to load latest checkpoint') pred_bbox = model.predict(image_data) pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) bboxes = utils.nms(bboxes, 0.213, method='nms') image = utils.draw_bbox(original_image, bboxes, utils.read_class_names(cfg.YOLO.CLASSES)) image = Image.fromarray(image) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(os.path.join(args.output, 'result.png'), image)
def crop_objects(img, data, path, allowed_classes): boxes, scores, classes, num_objects = data class_names = read_class_names(cfg.YOLO.CLASSES) # create dictionary to hold count of objects for image name counts = dict() for i in range(num_objects): # get count of class for part of image name class_index = int(classes[i]) class_name = class_names[class_index] if class_name in allowed_classes: counts[class_name] = counts.get(class_name, 0) + 1 # get box coords xmin, ymin, xmax, ymax = boxes[i] # crop detection from image (take an additional 5 pixels around all edges) # cropped_img = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5] # first crop, for container # can add padding but will need to do a min 0, max (length/width) - causes issues with cropping if not cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)] # # construct image name and join it to path for saving crop properly # img_name = class_name + '_' + str(counts[class_name]) + '.png' # img_path = os.path.join(path, img_name) # cv2.imwrite(img_path, cropped_img) # using EAST - uncomment line below and import for EAST text detection algorithm text_crop = text_detector(cropped_img) # using CRAFT # text_crop = text_detector(cropped_img, i) count = 0 for text_cropped in text_crop: try: # construct image name and join it to path for saving crop properly img_name = class_name + '_' + str( counts[class_name]) + str(count) + '.png' txt_name = class_name + '_' + str( counts[class_name]) + str(count) + '.txt' img_path = os.path.join(path, img_name) txt_path = os.path.join(path, txt_name) cv2.imwrite(img_path, text_cropped) count += 1 try: final_text_from_crop = ocr_for_crop( text_cropped, txt_path) # HEREEEEEEEEEEEEEEEEEEEEEEEE print(final_text_from_crop) except: print("error from text crop") except: print("error caused by: ", text_cropped) else: continue
def video_without_saving(): classes = utils.read_class_names(cfg.YOLO.CLASSES) num_classes = len(classes) return_elements = [ "input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0" ] pb_file = "./yolov3_coco.pb" video_path = "docs/images/racoon.mp4" video_path = 0 input_size = 416 graph = tf.Graph() return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements) with tf.Session(graph=graph) as sess: vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...] prev_time = time.time() pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [return_tensors[1], return_tensors[2], return_tensors[3]], feed_dict={return_tensors[0]: image_data}) pred_bbox = np.concatenate([ np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') image = utils.draw_bbox(frame, bboxes) curr_time = time.time() exec_time = curr_time - prev_time result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def __init__(self): self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_class = len(self.classes) self.strides = np.array(cfg.YOLO.STRIDES) self.anchors = utils.get_anchors(cfg.YOLO.ANCHORS) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD
def __init__(self,input_data,trainable): self.nnlib = ModelSkeleton(trainable) self.trainable = trainable self.strides = np.array(cfg.COCO_STRIDES) self.anchors = utils.get_anchors(cfg.COCO_ANCHORS) self.classes = utils.read_class_names(cfg.COCO_NAMES) self.num_class = len(self.classes) self.darknet53_output = self._darknet53(input_data)
def count_objects(data, by_class=False, allowed_classes=list( read_class_names(cfg.YOLO.CLASSES).values())): boxes, scores, classes, num_objects = data #create dictionary to hold count of objects counts = dict() # if by_class = True then count objects per class if by_class: class_names = read_class_names(cfg.YOLO.CLASSES) # loop through total number of objects found for i in range(num_objects): # grab class index and convert into corresponding class name if scores[i] >= 0.9: class_index = int(classes[i]) class_name = class_names[class_index] if class_name in allowed_classes: if (class_name in final_output) == False: final_output.update({class_name: 1}) print(final_output) objects = OrderedDict() objects = update(boxes, class_name) #final_output.update({ class_name: len(objects)}) #print(class_name) print(objects) #print(final_output) #counts[class_name] = counts.get(class_name, 0) + 1 total_output.update(final_output) else: continue else: continue print(final_output) # else count total objects found else: counts['total object'] = num_objects return counts
def __init__(self, input_data, trainable): self.trainable = trainable self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_class = len(self.classes) self.strides = np.array(cfg.YOLO.STRIDES) self.anchors = utils.get_anchors(cfg.YOLO.ANCHORS) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD self.mobile = cfg.YOLO.BACKBONE_MOBILE self.gt_per_grid = cfg.YOLO.GT_PER_GRID if self.mobile: try: self.conv_lbbox, self.conv_mbbox, self.conv_sbbox = self.__build_nework_mobile( input_data) except: raise NotImplementedError("Can not build up yolov3 network!") with tf.variable_scope('pred_sbbox'): self.pred_sbbox = self.decode_mobile( conv_output=self.conv_sbbox, num_classes=self.num_class, stride=self.strides[0]) with tf.variable_scope('pred_mbbox'): self.pred_mbbox = self.decode_mobile( conv_output=self.conv_mbbox, num_classes=self.num_class, stride=self.strides[1]) with tf.variable_scope('pred_lbbox'): self.pred_lbbox = self.decode_mobile( conv_output=self.conv_lbbox, num_classes=self.num_class, stride=self.strides[2]) """ with tf.variable_scope('pred_multi_scale'): self.pred_multi_scale = tf.concat([tf.reshape(self.pred_sbbox, [-1, 85]), tf.reshape(self.pred_mbbox, [-1, 85]), tf.reshape(self.pred_lbbox, [-1, 85])], axis=0, name='concat') """ # hand-coded the dimensions: if 608, use 19; if 416, use 13 with tf.variable_scope('pred_multi_scale'): self.pred_multi_scale = tf.concat([ tf.reshape(self.pred_sbbox, [-1, 19, 19, 85]), tf.reshape(self.pred_mbbox, [-1, 19, 19, 85]), tf.reshape(self.pred_lbbox, [-1, 19, 19, 85]) ], axis=0, name='concat')
def __init__(self, input_data, trainable): self.trainable = trainable self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_class = len(self.classes) self.strides = np.array(cfg.YOLO.STRIDES) self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD try: self.conv_s = self.__build_nework(input_data) except: raise NotImplementedError("Can not build up yolov3 network!")
def run_DL(_frame): # if pt_cfg.POLYTRACK.DL_DARK_SPOTS: # dark_spots = pt_cfg.POLYTRACK.RECORDED_DARK_SPOTS # if len(dark_spots): # _frame = map_darkspots(_frame, dark_spots) # else: # pass # else: # pass _frame = cv2.cvtColor(_frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(_frame) frame_size = _frame.shape[:2] image_data = cv2.resize(_frame, (cfg.YOLO.INPUT_SIZE, cfg.YOLO.INPUT_SIZE)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=pt_cfg.POLYTRACK.MAX_OUTPUT_SIZE_PER_CLASS, max_total_size=pt_cfg.POLYTRACK.MAX_TOTAL_SIZE, iou_threshold=pt_cfg.POLYTRACK.DL_IOU_THRESHOLD, score_threshold=pt_cfg.POLYTRACK.DL_SCORE_THRESHOLD) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = _frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) _detections = dl_detections_process(pred_bbox) return _detections
def __init__(self): self.input_size = cfg.TEST.INPUT_SIZE self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) self.score_threshold = cfg.TEST.SCORE_THRESHOLD self.iou_threshold = cfg.TEST.IOU_THRESHOLD self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.annotation_path = cfg.TEST.ANNOT_PATH self.weight_file = cfg.TEST.WEIGHT_FILE self.write_image = cfg.TEST.WRITE_IMAGE self.write_image_path = cfg.TEST.WRITE_IMAGE_PATH self.show_label = cfg.TEST.SHOW_LABEL
def __init__(self): self.input_size = cfg.TEST.INPUT_SIZE self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) self.score_threshold = cfg.TEST.SCORE_THRESHOLD self.iou_threshold = cfg.TEST.IOU_THRESHOLD self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.annotation_path = cfg.TEST.ANNOT_PATH self.number = cfg.TEST.NUMBER self.weight_file = cfg.TEST.WEIGHT_FILE self.model_file = cfg.TEST.MODEL_FILE self.write_image = cfg.TEST.WRITE_IMAGE self.write_image_path = cfg.TEST.WRITE_IMAGE_PATH self.show_label = cfg.TEST.SHOW_LABEL self.batch_size = cfg.TEST.BATCH_SIZE self.core_version = cfg.RUNTIME.CORE_VERSION self.precision = cfg.RUNTIME.PRECISION self.data_parallelism = cfg.RUNTIME.DATA_PARALLELISM self.model_parallelism = cfg.RUNTIME.MODEL_PARALLELISM self.core_num = cfg.RUNTIME.CORE_NUM if os.path.exists(self.model_file): print("model is exit") else: print("please check out model_file") graph = load_graph(self.model_file) self.input_data = graph.get_tensor_by_name("import/input/input_data:0") self.pred_sbbox = graph.get_tensor_by_name( "import/pred_sbbox/concat_2:0") self.pred_mbbox = graph.get_tensor_by_name( "import/pred_mbbox/concat_2:0") self.pred_lbbox = graph.get_tensor_by_name( "import/pred_lbbox/concat_2:0") self.bbox_raw = graph.get_tensor_by_name( "import/Yolov3DetectionOutput:0") config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) config.mlu_options.data_parallelism = self.data_parallelism config.mlu_options.model_parallelism = self.model_parallelism config.mlu_options.core_num = self.core_num config.mlu_options.core_version = self.core_version config.mlu_options.precision = self.precision config.mlu_options.save_offline_model = True config.mlu_options.offline_model_name = "yolov3_int8.cambricon" self.sess = tf.Session(config=config, graph=graph)
def count_objects(data, cords, by_class=False, allowed_classes=list( read_class_names(cfg.YOLO.CLASSES).values())): boxes, scores, classes, num_objects = data #create dictionary to hold count of objects counts = dict() # if by_class = True then count objects per class if by_class: class_names = read_class_names(cfg.YOLO.CLASSES) # loop through total number of objects found for i in range(num_objects): # grab class index and convert into corresponding class name class_index = int(classes[i]) class_name = class_names[class_index] if class_name in allowed_classes: # if x1 < el[0] and y1 < el[1] and x2 > el[2] and y2 > el[3]: if cords[0] == 0 and cords[1] == 0 and cords[ 2] == 720 and cords[3] == 480: counts[class_name] = counts.get(class_name, 0) + 1 elif cords[0] < boxes[i][0] and cords[1] < boxes[i][ 1] and cords[2] > boxes[i][2] and cords[3] > boxes[i][ 3]: counts[class_name] = counts.get(class_name, 0) + 1 else: continue # else count total objects found else: counts['total object'] = num_objects return counts
def load_model_yolov4(input_size=608, weights='./data/yolov4_original_last.weights'): NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASS, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights(model, weights) model.summary() return model
def count_objects(data, by_class=False, allowed_classes=list( read_class_names(cfg.YOLO.CLASSES).values())): boxes, scores, classes, num_objects = data counts = dict() if by_class: class_names = read_class_names(cfg.YOLO.CLASSES) for i in range(num_objects): class_index = int(classes[i]) class_name = class_names[class_index] if class_name in allowed_classes: counts[class_name] = counts.get(class_name, 0) + 1 else: continue else: counts['total object'] = num_objects return counts