def load_model(self, model_path, classes_list, use_gpu=True): if ("d0" in model_path): self.system_dict["params"]["compound_coef"] = 0 self.system_dict["params"]["weights_file"] = model_path elif ("d1" in model_path): self.system_dict["params"]["compound_coef"] = 1 self.system_dict["params"]["weights_file"] = model_path elif ("d2" in model_path): self.system_dict["params"]["compound_coef"] = 2 self.system_dict["params"]["weights_file"] = model_path elif ("d3" in model_path): self.system_dict["params"]["compound_coef"] = 3 self.system_dict["params"]["weights_file"] = model_path elif ("d4" in model_path): self.system_dict["params"]["compound_coef"] = 4 self.system_dict["params"]["weights_file"] = model_path elif ("d5" in model_path): self.system_dict["params"]["compound_coef"] = 5 self.system_dict["params"]["weights_file"] = model_path elif ("d6" in model_path): self.system_dict["params"]["compound_coef"] = 6 self.system_dict["params"]["weights_file"] = model_path elif ("d7" in model_path): self.system_dict["params"]["compound_coef"] = 7 self.system_dict["params"]["weights_file"] = model_path self.system_dict["params"]["obj_list"] = classes_list self.system_dict["params"]["use_cuda"] = use_gpu self.system_dict["local"]["color_list"] = standard_to_bgr( STANDARD_COLORS) # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.system_dict["local"]["input_size"] = input_sizes[ self.system_dict["params"]["compound_coef"]] if self.system_dict[ "params"]["force_input_size"] is None else self.system_dict[ "params"]["force_input_size"] self.system_dict["local"]["model"] = EfficientDetBackbone( compound_coef=self.system_dict["params"]["compound_coef"], num_classes=len(self.system_dict["params"]["obj_list"]), ratios=self.system_dict["params"]["anchor_ratios"], scales=self.system_dict["params"]["anchor_scales"]) self.system_dict["local"]["model"].load_state_dict( torch.load(self.system_dict["params"]["weights_file"])) self.system_dict["local"]["model"].requires_grad_(False) self.system_dict["local"]["model"] = self.system_dict["local"][ "model"].eval() if self.system_dict["params"]["use_cuda"]: self.system_dict["local"]["model"] = self.system_dict["local"][ "model"].cuda() if self.system_dict["params"]["use_float16"]: self.system_dict["local"]["model"] = model.half()
def display(preds, imgs, compound_coef, obj_list=None, imshow=True, imwrite=False, debug=False): if obj_list is None: obj_list = ['person'] color_list = standard_to_bgr(STANDARD_COLORS) for i in range(len(imgs)): if len(preds[i]['rois']) == 0: if debug: cv2.imshow('img', imgs[i]) cv2.waitKey(0) continue imgs[i] = imgs[i].copy() for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, color=color_list[get_index_label(obj, obj_list)]) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: os.system("mkdir -p ./assets/predictions") cv2.imwrite( f'./assets/predictions/img_inferred_d{compound_coef}_this_repo_{i}.jpg', imgs[i]) if imwrite: image_folder = './assets/predictions' image_files = [ image_folder + '/' + img for img in os.listdir(image_folder) if img.endswith(".jpg") ] clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip( image_files, fps=1) clip.write_videofile('./assets/predictions_testset.mp4')
def __save_image(self, preds, imgs, imwrite=True): color_list = standard_to_bgr(STANDARD_COLORS) for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue imgs[i] = imgs[i].copy() for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = self.obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, color=color_list[get_index_label( obj, self.obj_list)]) if imwrite: cv2.imwrite( f'test/img_inferred_d{self.compound_coef}_this_repo_{i}.jpg', imgs[i])
# replace this part with your project's anchor config anchor_ratios = eval(params.anchor_ratios) anchor_scales = eval(params.anchor_scales) crop_size = params.crop_size threshold = params.threshold iou_threshold = params.iou_threshold config = InferenceConfig() use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True color_list = standard_to_bgr(STANDARD_COLORS) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] model = EfficientDetBackbone(compound_coef=config.compound_coef, num_classes=len(config.obj_list), ratios=config.anchor_ratios, scales=config.anchor_scales) model.load_state_dict(torch.load(opt.weights)) model.requires_grad_(False) model.eval() if opt.command == 'report': square_size = params.square_size red_box = ((config.crop_size - square_size) / 2, (config.crop_size - square_size) / 2, (config.crop_size + square_size) / 2, (config.crop_size + square_size) / 2)
def inference(): compound_coef = 0 force_input_size = None # set None to use default size img_path = 'test/original_img.jpg' # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] color_list = standard_to_bgr(STANDARD_COLORS) # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[2] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDet_semanticBackbone(compound_coef=1, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load('model_weight/model_1_epoch_80.pth')) if use_cuda: model = model.cuda() with torch.no_grad(): features, regression, classification, anchors, sem_out = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) out = box(out, ori_imgs, color_list, obj_list, imshow=False, imwrite=False) outputs = sem_out.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w)) pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w)) pred_label_imgs = pred_label_imgs.astype(np.uint8) z = cv2.resize(pred_label_imgs[0], (ori_imgs[0].shape[1], ori_imgs[0].shape[0])) from semantic_utils.utils import label_img_to_color pred_label_img_color = label_img_to_color(z) overlayed_img = 0.35*out + 0.65*pred_label_img_color flag = cv2.imwrite('test/semantic_img_1.jpg', overlayed_img) return flag
def test(opt): compound_coef = 2 force_input_size = None # set None to use default size img_id = opt.img_id img_path = opt.img_path img_path = img_path + str(img_id) + '.jpg' # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['02010001', '02010002'] color_list = standard_to_bgr(STANDARD_COLORS) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(opt.weights, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) def display(preds, imgs, imshow=True, imwrite=False, img_id=1): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue imgs[i] = imgs[i].copy() imgs[i] = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB) for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, color=color_list[get_index_label(obj, obj_list)]) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: str1 = 'test/' + str(img_id) + '.jpg' cv2.imwrite(str1, imgs[i]) out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=False, imwrite=True, img_id=img_id) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) tempList = [] for j in range(len(out[0]['class_ids'])): tempout = {} tempout['image_id'] = img_id if out[0]['class_ids'][j] == 1: tempout['category_id'] = 2 else: tempout['category_id'] = 1 tempout['score'] = out[0]['scores'][j].astype(np.float64) tempout['bbox'] = [ (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][1]).astype(np.float64), (out[0]['rois'][j][2]).astype(np.float64) - (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][3]).astype(np.float64) - (out[0]['rois'][j][1]).astype(np.float64), ] tempList.append(tempout) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') with open("test/" + str(img_id) + ".json", "w") as f: json.dump(tempList, f) print("生成标注后的图片(" + str(img_id) + ".jpg)和json(" + str(img_id) + ".json)到test文件夹中...")