def detect(self, frame): timers = defaultdict(Timer) mask_frame = frame with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, mask_frame, None, timers=timers) # verifica se a imagem deve ser descartada boxes, sgms, keypts, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) bbox_list = [] if len(boxes) > 0: indexes_big = box_utils.filter_big_boxes(boxes, 300) indexes_small = box_utils.filter_small_boxes(boxes, 100) for i in range(len(boxes)): if (i in indexes_big and i in indexes_small): if classes[i] in [1, 2, 3] and boxes[i, 4] > 0.7: box = boxes[i] bbox_list.append([ int(box[0]), int(box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1]) ], classes[i]) # mask_frame = vis_utils.vis_one_image_opencv(mask_frame, cls_boxes, cls_segms, cls_keyps, thresh=0.8, kp_thresh=2, # show_box=True, dataset=CocoNames, show_class=True) #, hiden_indexes=True, indexes_shown=[1]) return bbox_list
def get_detections(self, image): cls_boxes, cls_depths, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, image, None) boxes, depths, _segms, _keyps, classes = convert_from_cls_format( cls_boxes, cls_depths, None, None) detections = [] for i in range(len(classes)): detection = {} detection["bbox"] = boxes[i, :4] detection["score"] = boxes[i, -1] detection["depth"] = depths[i] detection["category_id"] = classes[i] if detection["score"] > self.cla_thresholds[self.classnames[ detection["category_id"]]]: detections.append(detection) if self.filter_detections: filter_inside_boxes(detections, inside_ratio_thresh=self.inside_box_ratio) return detections
def post_processing(self, cls_boxes): dets = [] if cls_boxes is None: return {'detections': []} #print(cls_boxes) boxes, _, _, classes = vis_utils.convert_from_cls_format( cls_boxes, None, None) #print(boxes) if boxes is None: return {'detections': []} areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) #logger.info(len(sorted_inds)) #logger.info(sorted_inds) #logger.info(boxes) #logger.info(classes) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] threshold = self.thresholds[classes[i]] if score < threshold: continue name = self._get_class_string(classes[i], self.dummy_coco_dataset) x1, y1, x2, y2 = bbox.astype(float) dets.append({ 'name': name, 'score': float(score), 'box': [x1, y1, x2, y2], }) return {'detections': dets}
def run_model_cfg(args, im, check_blobs): workspace.ResetWorkspace() model, _ = load_model(args) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all( model, im, None, None, ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) # sort the results based on score for comparision boxes, segms, keypoints, classes = _sort_results( boxes, segms, keypoints, classes) # write final results back to workspace def _ornone(res): return np.array(res) if res is not None else np.array([], dtype=np.float32) with c2_utils.NamedCudaScope(0): workspace.FeedBlob(core.ScopedName('result_boxes'), _ornone(boxes)) workspace.FeedBlob(core.ScopedName('result_segms'), _ornone(segms)) workspace.FeedBlob(core.ScopedName('result_keypoints'), _ornone(keypoints)) workspace.FeedBlob(core.ScopedName('result_classids'), _ornone(classes)) # get result blobs with c2_utils.NamedCudaScope(0): ret = _get_result_blobs(check_blobs) return ret
def extract_person(image, background_image, model, dataset): timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, image, None, timers=timers) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps, ) masks = mask_util.decode(segms) masks = np.moveaxis(masks, 2, 0) output_image = np.copy(background_image) for box, mask, c, in zip(boxes, masks, classes): score = box[-1] if score < 0.9: continue if dataset.classes[c] != 'person': continue idx = np.where(mask != 0) output_image[idx[0], idx[1], :] = image[idx[0], idx[1], :] return output_image
def process_images( args_weights, args_im_or_folder, args_image_ext ): assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args_weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args_im_or_folder): im_list = glob.iglob(args_im_or_folder + '/*.' + args_image_ext) else: im_list = [args_im_or_folder] for i, im_name in enumerate(im_list): im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) boxes, segms, keyps, classes = vis_utils.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if classes is not None: class_strs = [dummy_coco_dataset.classes[c] for c in classes] im_name = im_name.split('/')[-1].split('.'+args_image_ext)[0] yield im_name, boxes, segms, class_strs, im.shape[:2]
def infer(par,thresh): logger = logging.getLogger(__name__) merge_cfg_from_file(par['config_path']) cfg.NUM_GPUS = 1 par['weight_path'] = cache_url(par['weight_path'], cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(par['weight_path']) if os.path.isdir(par['input_img_path']): im_list = glob.iglob(par['input_img_path'] + '/*.jpg') else: im_list = [par['input_img_path']] count = 0 t_total = 0 np.set_printoptions(suppress=True) #numpy不以科学计数法输出 for i, im_name in enumerate(im_list):# i为计数,im_name为图像路径 out_name = os.path.join( par['output_xml_path'], '{}'.format(os.path.basename(im_name.rstrip(".jpg")) + '.xml') ) #logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) w = float(im.shape[1]) h = float(im.shape[0]) #开始计时 timers = defaultdict(Timer) t_start = time.time() with c2_utils.NamedCudaScope(par['gpu_id']): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if boxes is not None: boxes = np.array(boxes) # 坐标归一化 ↓ # boxes[:,0:4] = boxes[:,0:4]/np.array([col,row,col,row]) # boxes = np.maximum(boxes,0) # boxes = np.minimum(boxes,1) classes_ = np.array(classes,dtype=int) classes_temp = classes_.reshape(1,-1) classes = np.transpose(classes_temp) res = np.hstack((classes,boxes)) # res中,第一列为类别,2~5列为坐标,第六列为分数 res = res[res[:,-1]>thresh] else: res = [] #结束计时 t_end = time.time() t_total = t_total + (t_end-t_start) count = count + 1 make_xml_file(par,res,w,h,out_name) print("Average detection time:",int(1000*t_total/count),"ms/img")
def infer_one_frame(image, model, img_path, pose_path): with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps, cls_bodys = infer_engine.im_detect_all( model, image, None, None ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) bodys = cls_bodys[1] valid_detection_inds = boxes[:, 4] > 0.9 if boxes.shape[0] == 0 or np.count_nonzero(valid_detection_inds) < 2: return {} areas = mask_util.area(segms) top2_inds = np.argsort(areas * valid_detection_inds)[-2:] # decide foreground player id1, id2 = top2_inds[0], top2_inds[1] top2_inds = [id2, id1] if boxes[id1][1] < boxes[id2][1] else [id1, id2] result = {'boxes': [boxes[i][:4].astype(int) for i in top2_inds], 'segms': [segms[i] for i in top2_inds], \ 'keyps': [keypoints[i].astype(int) for i in top2_inds], } #'bodys': [bodys[i] for i in top2_inds]} # crop foreground player body_box = boxes[top2_inds[0]][:4].astype(int) # x1, y1, x2, y2 uv_patch = bodys[top2_inds[0]].transpose([1, 2, 0]) uv_full = np.zeros(image.shape) uv_full[body_box[1] : body_box[1] + uv_patch.shape[0], body_box[0] : body_box[0] + uv_patch.shape[1], :] = uv_patch uv_full[:, :, 1:3] = 255. * uv_full[:, :, 1:3] uv_full[uv_full > 255] = 255. uv_full = uv_full.astype(np.uint8) cx, cy = (body_box[0] + body_box[2]) // 2, (body_box[1] + body_box[3]) // 2 crop_width = 640 #image.shape[0] // 2 crop_box = [cx - crop_width // 2, min(body_box[3] + 30, image.shape[0]) - crop_width] crop_box += [crop_box[0] + crop_width, crop_box[1] + crop_width] # body center box # crop_box = [cx - W // 2, cy - W // 2, W, W] if crop_box[0] < 0 or crop_box[2] > image.shape[1] or crop_box[1] < 0 or crop_box[3] > image.shape[0] return result # visualize crop # cv2.rectangle(image, (crop_box[0], crop_box[1]), (crop_box[2], crop_box[3]), (255,255,255), 5) # cv2.rectangle(uv_full, (crop_box[0], crop_box[1]), (crop_box[2], crop_box[3]), (255,255,255), 5) # cv2.imwrite(img_path, image) # cv2.imwrite(pose_path, uv_full) cv2.imwrite(img_path, image[crop_box[1] : crop_box[3], crop_box[0] : crop_box[2], :]) cv2.imwrite(pose_path, uv_full[crop_box[1] : crop_box[3], crop_box[0] : crop_box[2], :]) result['crop_box'] = crop_box result['img_path'] = img_path result['densepose_path'] = pose_path return result
def main(args): logger = logging.getLogger(__name__) pdb.set_trace() merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] print(im_list) for i, im_name in enumerate(im_list): im_basename = os.path.basename(im_name) out_name = os.path.join( args.output_dir, '{}'.format(im_basename + '.' + args.output_ext)) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) result_file = os.path.join(args.output_dir, im_basename + '_bbox.pkl') with open(result_file, 'wb') as f: boxes, _, _, _ = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) print(boxes.shape) pickle.dump(cls_boxes, f) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)')
def do_one_image_opencv( im, boxes, segms=None, keypoints=None, thresh=0.9,reply=None): masks=None classes=None if isinstance(boxes, list): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: reply["status"] = 0 reply["boxes"] = None #reply["keypoints"] = None reply["classes"] = None return None if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) # Empty image s0, s1, _ = im.shape result = np.empty(shape=(s0, s1), dtype=np.uint8) _boxes=[] _classes=[] for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] # score too low => skip if score < thresh: continue # only humans #if classes[i] != 1: # continue if segms is not None and len(segms) > i: result=np.maximum(result,i*masks[..., i]) _boxes.append(boxes[i].tolist()) _classes.append(classes[i]) reply["status"]=0 reply["boxes"]=_boxes #reply["keypoints"]=keypoints.tolist() reply["classes"]=_classes return result
def getInfoByModel_orig(self, image, thresh=0.6): timers = defaultdict(Timer) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, image, None, timers=timers) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) boxes_top = [] classes_top = [] if boxes is not None: for i, box in enumerate(boxes): if box[-1] > thresh: boxes_top.append(box) classes_top.append(self.labelmap[classes[i]]) return boxes_top, classes_top
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): #if i > 10: # continue out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name)[:-len(args.image_ext)] + 'mat') ) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) boxes, segms, keyps, classes = vis_utils.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if boxes is None: continue segms = vis_utils.mask_util.decode(segms) valid_inds = np.greater(boxes[:, 4], 0.5) boxes = boxes[valid_inds, :] segms = segms[:, :, valid_inds] classes = np.array(classes)[valid_inds] class_names = np.asarray(([coco_to_pascal_name(coco_classes[c-1]) for c in classes]), dtype='object') sio.savemat(out_name, {'masks': segms, 'boxes': boxes, 'classes': class_names});
def infer_image(self, req): # bridge = CvBridge() # image = bridge.imgmsg_to_cv2(req.rgb_image, desired_encoding='bgr8') path = self.dir + "/" + str(req.num) image = cv2.imread(path + "/image.png") category = [] mask_image_all = [] timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, image, None, timers=timers) self.logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): self.logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, None) if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) file_name = path + "/image.txt" file = open(file_name, 'w') file.write(str(0) + str(" ")) file.close() ground_image = np.ones((480, 640, 1), np.uint8) class_num = 1 for item in range(len(boxes)): score = boxes[item, -1] if score < self.thresh: continue idx = np.nonzero((masks[..., item])) mask_image = np.zeros((480, 640, 1), np.uint8) mask_image[idx[0], idx[1], :] = 255 ground_image[idx[0], idx[1], :] = 0 # mask_msg = bridge.cv2_to_imgmsg(mask_image, encoding='bgr8') # category.append(classes[item]) # mask_image_all.append(mask_image) cv2.imwrite(path + "/" + str(classes[item]) + ".png", mask_image) file_name = path + "/image.txt" f = open(file_name, 'a') f.write(str(classes[item]) + str(" ")) f.close() class_num += 1 cv2.imwrite(path + "/0.png", ground_image) return class_num
def getInfoByModel(self, image, thresh=0.7): top_labels = [] top_xmin = [] top_ymin = [] top_xmax = [] top_ymax = [] top_scores = [] timers = defaultdict(Timer) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, image, None, timers=timers) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) specific_thresh = {'sdf': 0.6, 'def': 0.6} exclusiveGroups = [['truck', 'car', 'tanker', 'bus', 'motorcycle'], [ 'blue', 'yellow', 'red', 'gray', 'security', 'other' ]] boxes, classes = self.nms_exclusive_boxes(boxes, classes, exclusiveGroups, thresh) try: for i, box in enumerate(boxes): # print(box) temp_thresh = thresh if self.labelmap[classes[i]] in specific_thresh: temp_thresh = specific_thresh[self.labelmap[classes[i]]] if box[-1] > temp_thresh: try: top_labels.append(self.labelmap[classes[i]]) except: top_labels.append(str(classes[i])) # print(classes) # print(self.labelmap[classes[i]]) top_xmin.append(float(box[0]) / float(image.shape[1])) top_ymin.append(float(box[1]) / float(image.shape[0])) top_xmax.append(float(box[2]) / float(image.shape[1])) top_ymax.append(float(box[3]) / float(image.shape[0])) top_scores.append(box[4]) except: pass return top_labels, top_xmin, top_ymin, top_xmax, top_ymax, top_scores
def format_and_save(cls_boxes, cls_segms, cls_keyps, i, output_file): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if segms is not None: masks = mask_util.decode(segms) else: masks = np.asarray([[[]]]) # an empty array with shape[2] == 0 all_contours = [] # This might not be getting reset for mask_idx in range(masks.shape[2]): #print("shapes are {}".format(masks[...,mask_idx].shape)) _, contours, _ = cv2.findContours(masks[...,mask_idx].copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # why is this getting copied all_contours.append(contours) # this code is more general and allows for multiple contours, but there aren't any if boxes is None: boxes = [] else: boxes = boxes.tolist() print("classes are {}".format(classes)) # create the mot formated row def mot_row(i, boxes, classes): """<frame>, <id=class>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x=-1>, <y=-1>, <z=-1> """ assert len(boxes) == len(classes), "the boxes weren't the same length as the boxes" out_ = np.empty((0,10), float) for box_id, box_ in enumerate(boxes): class_ = classes[box_id] # check that the conversion is correct # and that conf is where I think it is out_ = np.append(out_, np.array([[i,classes[box_id], box_[0], box_[1], box_[2]-box_[0], box_[3]-box_[1], box_[4], -1.0, -1.0, -1.0]]), axis=0) return out_ frame_data = { 'frame': i, 'boxes': boxes, 'classes': classes, 'contours': [[c.tolist() for c in some_contours] for some_contours in all_contours] } #print(frame_data) with h5py.File(output_file, 'a') as file_handler: file_handler.create_dataset(str("{:09d}".format(i)), data=json.dumps(frame_data))
def _write_to_txt(boxes, segms, keypoints, im_name, dataset): #output_txt_dir = os.path.join(args.output_dir, 'txtFiles') output_caronly_dir = os.path.join(args.output_dir, 'txtFilesCarOnly') #if not os.path.exists(output_txt_dir): # os.makedirs(output_txt_dir) if not os.path.exists(output_caronly_dir): os.makedirs(output_caronly_dir) if isinstance(boxes, list): boxes, _, _, classes = vis_utils.convert_from_cls_format( boxes, segms, keypoints) if (boxes is None or boxes.shape[0] == 0) and not args.out_when_no_box: return image_name, _ = os.path.splitext(os.path.basename(im_name)) result_file_car_only = open( os.path.join(output_caronly_dir, os.path.basename(im_name).replace(args.image_ext, 'txt')), 'w') #result_file = open( # os.path.join(output_txt_dir, # os.path.basename(im_name).replace(args.image_ext, 'txt')), # 'w') for i in range(boxes.shape[0]): bbox = boxes[i, :4] score = boxes[i, -1] label = dataset.classes[classes[i]] if dataset is not None else \ 'id{:d}'.format(classes[i]) #result_file.write( # "%s -1 -1 -10 %.3f %.3f %.3f %.3f -1 -1 -1 -1000 -1000 -1000 -10 %.8f\n" # % (label, bbox[0], bbox[1], bbox[2], bbox[3], score)) if classes[i] == 3: label = 'Car' result_file_car_only.write( "%s -1 -1 -10 %.3f %.3f %.3f %.3f -1 -1 -1 -1000 -1000 -1000 -10 %.8f\n" % (label, bbox[0], bbox[1], bbox[2], bbox[3], score))
def detect(self, im): timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, im, None, timers=timers) self.logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): self.logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) im_show = vis_utils.vis_one_image_opencv( im, cls_boxes, cls_segms, cls_keyps, dataset=self.dummy_coco_dataset) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) ids = [i for i, c in enumerate(classes) if c == 1] valid = len(ids) > 0 kps = -np.ones((3, 17)) if valid: scores = boxes[ids, -1] kps = keypoints[ids[np.argmax(scores)]][:3] return valid, kps, im_show
def infer(im, i, output_name='None', video='None', mot_output=[], json_output=[], masks_output=[]): timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if segms is not None: masks = mask_util.decode(segms) else: masks = np.asarray([[[]]]) # an empty array with shape[2] == 0 all_contours = [] # This might not be getting reset for mask_idx in range(masks.shape[2]): #print("shapes are {}".format(masks[...,mask_idx].shape)) _, contours, _ = cv2.findContours( masks[..., mask_idx].copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # why is this getting copied all_contours.append( contours ) # this code is more general and allows for multiple contours, but there aren't any if boxes is None: boxes = [] else: boxes = boxes.tolist() print("classes are {}".format(classes)) # create the mot formated row def mot_row(i, boxes, classes): """<frame>, <id=class>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x=-1>, <y=-1>, <z=-1> """ assert len(boxes) == len( classes), "the boxes weren't the same length as the boxes" out_ = np.empty((0, 10), float) for box_id, box_ in enumerate(boxes): class_ = classes[box_id] # check that the conversion is correct # and that conf is where I think it is out_ = np.append(out_, np.array([[ i, classes[box_id], box_[0], box_[1], box_[2] - box_[0], box_[3] - box_[1], box_[4], -1.0, -1.0, -1.0 ]]), axis=0) return out_ if args.mot_format: #TODO actually write out the class o fthe detection try: mot_output = np.append(mot_output, mot_row(i, boxes, classes), axis=0) except ValueError: import pdb pdb.set_trace() json_output.append({ 'video': video, 'frame': i, 'boxes': boxes, 'classes': classes, 'contours': [[c.tolist() for c in some_contours] for some_contours in all_contours] }) #masks_output[i] = [[c.tolist() for c in some_contours] for some_contours in all_contours]# this will be a mapping from frame to contours else: json_output.append({ 'video': video, 'frame': i, 'boxes': boxes, 'classes': classes, 'contours': [[c.tolist() for c in some_contours] for some_contours in all_contours] }) if i % 100 == 0: print("about to save files") save_files( args, im_name, output_basename, mot_output, json_output, masks_output ) # so this keeps all the data in memory which seems terrible json_output = [] mot_output = [] masks_output = [] if len(json_output) != 0 or len(masks_output) != 0 or len( mot_output) != 0: import pdb pdb.set_trace() # MOD print('output name {}'.format(output_name)) #HACK VISUALIZATION_FREQUENCY = 100 if args.visualize and i % VISUALIZATION_FREQUENCY == 0: start_time = time.time() vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization output_name, "{}/images".format(args.output_dir), cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.4, # change the display confidence kp_thresh=2, ext=args.output_ext, out_when_no_box=args.out_when_no_box) print( "only visualizing every {}th frame, it took {} seconds".format( VISUALIZATION_FREQUENCY, time.time() - start_time)) if args.mot_format: return mot_output else: pass
def vis_extract_func(im, im_name, output_dir, boxes, segms=None, keypoints=None, cls_feats=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf', out_when_no_box=False): """Visual debugging of detections.""" #ADDED declare variables to return textbox_assigned = 0 textbox_feats = None one_human_assigned = 0 human_feats = None textbox = None if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) #ADDED similar to convert_from_cls_format, but for feats_list feats_list = [b for b in cls_feats if len(b) > 0] if len(feats_list) > 0: feats = np.concatenate(feats_list) else: feats = None if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box: return None, None, 0 dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap(rgb=True) / 255 kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im) if boxes is None: sorted_inds = [] # avoid crash when 'boxes' is None else: # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) mask_color_id = 0 for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue #ADDED the text material can be either: tv (i=63), laptop (64), cell phone (68), book (74) is_txtmtrl = classes[i] == 63 or classes[i] == 64 or classes[ i] == 68 or classes[i] == 74 if is_txtmtrl and not textbox_assigned: textbox_feats = feats[i] normbbox = bbox / 256 textbox_xmid = (normbbox[2] + normbbox[0]) / 2 textbox_ymid = (normbbox[3] + normbbox[1]) / 2 textbox = np.concatenate((normbbox, textbox_xmid, textbox_ymid), axis=None) # show box (off by default) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text(bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict(facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') # show mask if segms is not None and len(segms) > i: img = np.ones(im.shape) color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 w_ratio = .4 for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio for c in range(3): img[:, :, c] = color_mask[c] e = masks[:, :, i] contour, hier = cv2.findContours(e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) for c in contour: polygon = Polygon(c.reshape((-1, 2)), fill=True, facecolor=color_mask, edgecolor='w', linewidth=1.2, alpha=0.5) ax.add_patch(polygon) textbox_assigned = 1 #ADDED human features are extracted, although we prefer to use "infer_simple_extract_human" for that if classes[i] == 1 and not one_human_assigned: human_feats = feats[i] one_human_assigned = 1 # show keypoints if keypoints is not None and len(keypoints) > i: kps = keypoints[i] plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1], kps[0, i2]] y = [kps[1, i1], kps[1, i2]] line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot(kps[0, i1], kps[1, i1], '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot(kps[0, i2], kps[1, i2], '.', color=colors[l], markersize=3.0, alpha=0.7) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = (kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]] y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]] line = plt.plot(x, y) plt.setp(line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0], mid_hip[0]] y = [mid_shoulder[1], mid_hip[1]] line = plt.plot(x, y) plt.setp(line, color=colors[len(kp_lines) + 1], linewidth=1.0, alpha=0.7) if textbox_assigned: output_name = os.path.basename(im_name) + '.' + ext fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi) plt.close('all') return textbox, textbox_feats, textbox_assigned, human_feats, one_human_assigned
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] json_dict = {} for i, im_name in enumerate(im_list): basename = os.path.basename(im_name) page = int(basename[-7:-4]) pdf_name = basename[:-8] #out_name = os.path.join( # args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext) # args.output_dir, '{}'.format(os.path.basename(im_name) + '.tables.json') #) #logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) # Skip writing PDF output #vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=args.thresh, # kp_thresh=args.kp_thresh, # ext=args.output_ext, # out_when_no_box=args.out_when_no_box #) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is not None: for box in boxes: b = box.tolist() if b[4] < 0.9: continue w = b[2] - b[0] + 1 h = b[3] - b[1] + 1 json_dict.setdefault(pdf_name, []).append({'page':page, 'x':b[0], 'y':b[1], 'w':w, 'h':h, 'score':b[4]}) # args.output_dir + '/boxes.json' for pdf_name, jsons in json_dict.items(): out_name = os.path.join( args.im_or_folder, '{}'.format(pdf_name + '.tables.json') ) if len(jsons) > 0: with open(out_name, 'w') as outfile: json.dump(jsons, outfile, indent=4)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): im_basename = os.path.splitext(os.path.basename(im_name))[0] out_name = os.path.join( args.output_dir, '{}'.format(im_basename + '.' + args.output_ext) ) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=args.thresh, kp_thresh=args.kp_thresh, ext=args.output_ext, out_when_no_box=args.out_when_no_box ) # save bounding box information to a file box_results = os.path.join(args.output_dir, '{}.csv'.format(im_basename)) box_fh = open(box_results, 'w') # convert class bounding boxes, segments, keypoints boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps ) if boxes is not None: for i in range(len(boxes)): left = int(round(boxes[i, 0])) top = int(round(boxes[i, 1])) right = int(round(boxes[i, 2])) bottom = int(round(boxes[i, 3])) score = boxes[i, -1] if score < args.thresh: continue class_name = dummy_coco_dataset.classes[classes[i]] box_fh.write('{},{:.2f},{},{},{},{}\n'.format( class_name, score, left, top, right, bottom)) box_fh.close()
def vis_densepose(img, cls_boxes, cls_bodys, show_human_index=False, show_uv=True, show_grid=False, show_border=False, border_thick=1, alpha=0.4): """Construct a numpy array showing the densepose detection. # Arguments: img: image used for densepose inference cls_boxes: bounding boxes found during inference of image img cls_bodys: UV of each body parts found during inference of image img show_uv: show the UV fields show_grid: show the isocontours of the UV fields alpha: how much blended the densepose visualisation is with the original image img # Return: A numpy image array showing the densepose detection """ if isinstance(cls_boxes, list): boxes, _, _, _ = vis_utils.convert_from_cls_format( cls_boxes, None, None) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < 0.9: return img ## Get full IUV image out IUV_fields = cls_bodys[1] # all_coords = np.zeros(img.shape) # I, U and V channels all_inds = np.zeros([ img.shape[0], img.shape[1] ]) # all_inds stores index of corresponding human (background=0) ## inds = np.argsort(boxes[:, 4]) ## for i, ind in enumerate(inds): entry = boxes[ind, :] if entry[4] > 0.65: #second threshold on human proba to be in box entry = entry[0:4].astype(int) #### output = IUV_fields[ind] #### all_coords_box = all_coords[entry[1]:entry[1] + output.shape[1], entry[0]:entry[0] + output.shape[2], :] all_coords_box[all_coords_box == 0] = output.transpose( [1, 2, 0])[all_coords_box == 0] all_coords[entry[1]:entry[1] + output.shape[1], entry[0]:entry[0] + output.shape[2], :] = all_coords_box ### CurrentMask = (output[0, :, :] > 0).astype(np.float32) all_inds_box = all_inds[entry[1]:entry[1] + output.shape[1], entry[0]:entry[0] + output.shape[2]] all_inds_box[all_inds_box == 0] = CurrentMask[all_inds_box == 0] * (i + 1) all_inds[entry[1]:entry[1] + output.shape[1], entry[0]:entry[0] + output.shape[2]] = all_inds_box # all_coords[:, :, 1:3] = 255. * all_coords[:, :, 1:3] all_coords[all_coords > 255] = 255. all_coords = all_coords.astype(np.uint8) all_inds = all_inds.astype(np.uint8) res = img # initialise image result to input image if show_human_index: all_inds_vis = all_inds * ( 210.0 / all_inds.max() ) # normalise all_inds values between 0. and 210. all_inds_stacked = np.stack((all_inds_vis, ) * 3, axis=-1) res = all_inds_stacked elif show_grid: res = vis_isocontour(img, all_coords) alpha = 0. elif show_uv: res = all_coords # I, U and V channels alpha = 0. if show_border: res = vis_mask(res, all_inds, np.array([150., 20., 200.]), alpha=alpha, show_border=show_border, border_thick=border_thick) return res
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext) ) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=args.thresh, # kp_thresh=args.kp_thresh, # ext=args.output_ext, # out_when_no_box=args.out_when_no_box # ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) print(classes) with open(os.path.join(args.output_dir, "out.csv"), "a+", newline='') as csv_out: csv_writer = csv.writer(csv_out, delimiter=';', quotechar='"') # Save one box per line for i, box in enumerate(boxes): if box[-1] < 0.8: continue if classes[i] not in [5]: continue row = [im_name.split(os.path.sep)[-1]] # Format box x1, y1, x2, y2, p for coord in box[:4]: row.append(int(round(coord))) row.append(classes[i]) csv_writer.writerow(row)
def save_submit(im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False, ext='pdf', out_when_no_box=False): if not os.path.exists(output_dir): os.makedirs(output_dir) if isinstance(boxes, list): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( boxes, segms, keypoints) if boxes is None: sorted_inds = [] # avoid crash when 'boxes' is None else: # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) rects = [] for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue #print( vis_utils.get_class_string(classes[i], score, dataset)) #print(bbox) rect = { "xmin": bbox[0], "xmax": bbox[2], "ymin": bbox[1], "ymax": bbox[3], "confidence": score, "label": "defect" + str(classes[i] - 1) } rects.append(rect) # show box (off by default) ''' ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=0.5, alpha=box_alpha)) if show_class: ax.text( bbox[0], bbox[1] - 2, get_class_string(classes[i], score, dataset), fontsize=3, family='serif', bbox=dict( facecolor='g', alpha=0.4, pad=0, edgecolor='none'), color='white') ''' return rects
filename = "svm.p" clf = pickle.load(open(filename, "rb")) try: video = cv2.VideoCapture("data_processing/test.mp4") if not video.isOpened(): exit("Couldn't access webcam (check permissions?)") _, im = video.read() #im = cv2.imread("image.jpg") while im is not None: #Detect stuff and convert to usable form cls_boxes, cls_segms, cls_keyps = kpdetection.detect(im) boxes, segms, keyps, classes = \ vis_utils.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) #Remove keypoints below thresholds keyps, boxes = kpdetection.prune(keyps, boxes) #Draw keypoints on frame visualize = True if visualize: vis = vis_utils.vis_one_image_opencv(im, cls_boxes, keypoints=cls_keyps) cv2.imshow("image", vis) #SVM if len(keyps) > 0: #if anything detected
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=0.7, # kp_thresh=2 # ) import pycocotools.mask as mask_util boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if segms is not None and len(segms) > 0: score = boxes[:, -1] index = [i for i, _sc in enumerate(score) if _sc > 0.7] mask = mask_util.decode(segms) for i in index: class_text = get_class_string(classes[i], dummy_coco_dataset) if not (os.path.exists('{}/{}'.format(args.output_dir, class_text))): os.mkdir('{}/{}'.format(args.output_dir, class_text)) cv2.imwrite( '{}/{}/{}_{}.jpg'.format(args.output_dir, class_text, im_name.split('/')[-1][:-4], i), mask[:, :, i] * 255.0) print(class_text)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join(args.output_dir, '{}'.format(os.path.basename(im_name))) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') # pdb.set_trace() boxes, segms, keyps, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None: continue segms = vis_utils.mask_util.decode(segms) valid_inds = np.greater(boxes[:, 4], 0.5) boxes = boxes[valid_inds, :] segms = segms[:, :, valid_inds] classes = np.array(classes)[valid_inds] out_mat_name = os.path.join(args.output_dir, '{}'.format(os.path.basename(im_name))) class_names = np.asarray( ([coco_to_pascal_name(coco_classes[c - 1]) for c in classes]), dtype='object') sio.savemat(out_mat_name, { 'masks': segms, 'boxes': boxes, 'classes': class_names })
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) #cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] #Sort frames by number im_list = list(im_list) im_list.sort() json_output = [] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None: boxes = [] else: boxes = boxes.tolist() dictionary = {'Bounding boxes': boxes} # Skip writing PDF output # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=0.7, # kp_thresh=2 # ) with open( args.output_dir + '/' + os.path.splitext(os.path.basename(im_name))[0] + '.json', 'w') as outfile: json.dump(dictionary, outfile, indent=4)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] jsonboxes = [] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext)) file_name = os.path.basename(im_name) (image_name, ext) = os.path.splitext(os.path.basename(im_name)) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) boxes, segms, keypoints, classes = convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) jsonboxes_perimage = { 'boxes': boxes, 'classes': classes, 'image_id': int(image_name), 'file_name': file_name } jsonboxes_perimage['boxes'] = jsonboxes_perimage['boxes'].tolist() jsonboxes.append(jsonboxes_perimage) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=args.thresh, kp_thresh=args.kp_thresh, ext=args.output_ext, out_when_no_box=args.out_when_no_box) Dataset_name = args.dataset_name direction = '/home/tecnimaq/Gabriela/Detectron/json_dump' jsonboxesfile = 'jsonboxes_' + Dataset_name + '.json' with open(os.path.join(direction, jsonboxesfile), 'w') as thisfile: json.dump(jsonboxes, thisfile) dir = os.path.join(direction, jsonboxesfile) info.get_info(dir, Dataset_name)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) #cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) #assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for ii, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) h, w = im.shape[:2] subimages = [] for x in range(3): for y in range(3): x1, y1 = x * h // 4, y * w // 4 x2, y2 = (x + 2) * h // 4, (y + 2) * w // 4 subimages.append([x1, y1, x2, y2]) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes = [] cls_segms = [] cls_keyps = [] for index in range(len(subimages)): x1, y1, x2, y2 = subimages[index] _cls_boxes, _cls_segms, _cls_keyps = infer_engine.im_detect_all( model, im[x1:x2, y1:y2, :], None, timers=timers) cls_boxes.append(_cls_boxes) cls_segms.append(_cls_segms) cls_keyps.append(_cls_keyps) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) if ii == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') t = time.time() out_name_yml = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name)[:-4] + '.yml')) _mask = np.zeros((h, w), dtype=np.uint8) all_boxes = np.zeros((0, 5)) all_classes = [] all_segs = [] for index in range(len(subimages)): x1, y1, x2, y2 = subimages[index] boxes, segms, keyps, classes = vis_utils.convert_from_cls_format( cls_boxes[index], cls_segms[index], cls_keyps[index]) if boxes is None: continue for i in range(boxes.shape[0]): _tmp = np.zeros((h, w), dtype=np.uint8, order='F') __segm = mask_util.decode(segms[i]) _tmp[x1:x2, y1:y2] = __segm __tmp = mask_util.encode(_tmp) all_segs.append(__tmp) _mask[x1:x2, y1:y2] += __segm all_classes.append(classes[i]) boxes[:, 0] += y1 boxes[:, 2] += y1 boxes[:, 1] += x1 boxes[:, 3] += x1 all_boxes = np.vstack((all_boxes, boxes)) _mask = _mask.astype(bool).astype(int) out_name_mask = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name)[:-4] + '.png')) cv2.imwrite(out_name_mask, _mask * 255) with open(out_name_yml, 'a+') as outfile: yaml.dump( { 'boxes': all_boxes, 'segms': all_segs, 'classes': all_classes }, outfile, default_flow_style=False) logger.info('Saving time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time))