def check_detector(self, img_name): """ Detect bboxs in one image Input: 'str', full path of image Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]', The output results are similar with coco results type, except that image_id uses full path str instead of coco %012d id for generalization. """ args = self.detector_opt _CUDA = True if args: if args.gpus[0] < 0: _CUDA = False if not self.model: self.load_model() if isinstance(self.model, torch.nn.DataParallel): self.model = self.model.module dets_results = [] #pre-process(scale, normalize, ...) the image img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim) with torch.no_grad(): img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2) img = img.to(args.device) if args else img.cuda() scaling_factor = torch.FloatTensor([ 1 / min(self.inp_dim / orig_dim[0], self.inp_dim / orig_dim[1]) for orig_dim in img_dim_list ]).view(-1, 1) scaling_factor = scaling_factor.to( args.device) if args else scaling_factor.cuda() output = self.model(img, scaling_factor) output = output.cpu() for index, sample in enumerate(output): image_id = int(os.path.basename(img_name).split('.')[0]) for det in sample: score = float(det[4]) if score < .001: # stop when below this threshold, scores in descending order break #### uncomment it for only human detection # if int(det[5]) != 1 or score < self.confidence: # continue coco_det = dict(image_id=image_id, bbox=det[0:4].tolist(), score=score, category_id=int(det[5])) dets_results.append(coco_det) return dets_results
def image_preprocess(self, img_source): """ Pre-process the img before fed to the object detection network Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR) Output: pre-processed image data(torch.FloatTensor,(1,3,h,w)) """ if isinstance(img_source, str): img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim) elif isinstance(img_source, torch.Tensor) or isinstance( img_source, np.ndarray): img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim) else: raise IOError('Unknown image source type: {}'.format( type(img_source))) return img
def detect_one_img(self, img_name): """ Detect bboxs in one image Input: 'str', full path of image Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]', The output results are similar with coco results type, except that image_id uses full path str instead of coco %012d id for generalization. """ args = self.detector_opt _CUDA = True if args: if args.gpus[0] < 0: _CUDA = False if not self.model: self.load_model() if isinstance(self.model, torch.nn.DataParallel): self.model = self.model.module dets_results = [] #pre-process(scale, normalize, ...) the image img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim) with torch.no_grad(): img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2) img = img.to(args.device) if args else img.cuda() scaling_factor = torch.FloatTensor([ 1 / min(self.inp_dim / orig_dim[0], self.inp_dim / orig_dim[1]) for orig_dim in img_dim_list ]).view(-1, 1) scaling_factor = scaling_factor.to( args.device) if args else scaling_factor.cuda() prediction = self.model(img, scaling_factor) #do nms to the detection results, only human category is left dets = self.dynamic_get_results(prediction, self.confidence, self.num_classes, nms=True, nms_conf=self.nms_thres) if isinstance(dets, int) or dets.shape[0] == 0: return None dets = dets.cpu() img_dim_list = torch.index_select(img_dim_list, 0, dets[:, 0].long()) for i in range(dets.shape[0]): dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0, img_dim_list[i, 0]) dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0, img_dim_list[i, 1]) #write results det_dict = {} x = float(dets[i, 1]) y = float(dets[i, 2]) w = float(dets[i, 3] - dets[i, 1]) h = float(dets[i, 4] - dets[i, 2]) det_dict["category_id"] = 1 det_dict["score"] = float(dets[i, 5]) det_dict["bbox"] = [x, y, w, h] det_dict["image_id"] = int( os.path.basename(img_name).split('.')[0]) dets_results.append(det_dict) return dets_results
def detect_one_img(self, img_name): """ Detect bboxs in one image Input: 'str', full path of image Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]', The output results are similar with coco results type, except that image_id uses full path str instead of coco %012d id for generalization. """ args = self.detector_opt _CUDA = True if args: if args.gpus[0] < 0: _CUDA = False if not self.model: self.load_model() if isinstance(self.model, torch.nn.DataParallel): self.model = self.model.module dets_results = [] #pre-process(scale, normalize, ...) the image img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim) with torch.no_grad(): img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2) img = img.to(args.device) if args else img.cuda() scaling_factor = torch.FloatTensor([ 1 / min(self.inp_dim / orig_dim[0], self.inp_dim / orig_dim[1]) for orig_dim in img_dim_list ]).view(-1, 1) scaling_factor = scaling_factor.to( args.device) if args else scaling_factor.cuda() prediction = self.model(img, scaling_factor) #change the pred format to alphapose (nms has already been done in effdeteval model) prediction = prediction.cpu() write = False for index, sample in enumerate(prediction): for det in sample: score = float(det[4]) if score < .001: # stop when below this threshold, scores in descending order break if int(det[5]) != 1 or score < self.confidence: continue det_new = prediction.new(1, 8) det_new[0, 0] = index #index of img det_new[0, 1:3] = det[0:2] # bbox x1,y1 det_new[0, 3:5] = det[0:2] + det[2:4] # bbox x2,y2 det_new[0, 6:7] = det[4] # cls conf det_new[0, 7] = det[5] # cls idx if not write: dets = det_new write = True else: dets = torch.cat((dets, det_new)) if not write: return None img_dim_list = torch.index_select(img_dim_list, 0, dets[:, 0].long()) for i in range(dets.shape[0]): dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0, img_dim_list[i, 0]) dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0, img_dim_list[i, 1]) #write results det_dict = {} x = float(dets[i, 1]) y = float(dets[i, 2]) w = float(dets[i, 3] - dets[i, 1]) h = float(dets[i, 4] - dets[i, 2]) det_dict["category_id"] = 1 det_dict["score"] = float(dets[i, 5]) det_dict["bbox"] = [x, y, w, h] det_dict["image_id"] = int( os.path.basename(img_name).split('.')[0]) dets_results.append(det_dict) return dets_results