def predict(self, im): #class_str_list = [] data_list = [] with c2_utils.NamedCudaScope(self.gpu_id): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, im, None, None) #get box classes if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = self.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max( boxes[:, 4]) < self.score_thresh: return data_list #get score areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) #nms between classes #im2 = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) #result2= im2.copy() if (len(sorted_inds) > 0): nmsIndex = self.nms_between_classes( boxes, self.class_nms_thresh) #阈值为0.9,阈值越大,过滤的越少 for i in xrange(len(nmsIndex)): bbox = boxes[nmsIndex[i], :4] score = boxes[nmsIndex[i], -1] if score < self.score_thresh: continue #get class-str class_str = self.get_class_string(classes[nmsIndex[i]], score, self.dummy_coco_dataset) #score thresd per class if self.per_class_thresh: if 'autotruck' == class_str and score < self.autotruck_score_thresh: continue if 'forklift' == class_str and score < self.forklift_score_thresh: continue if 'digger' == class_str and score < self.digger_score_thresh: continue if 'car' == class_str and score < self.car_score_thresh: continue if 'bus' == class_str and score < self.bus_score_thresh: continue if 'tanker' == class_str and score < self.tanker_score_thresh: continue if 'person' == class_str and score < self.person_score_thresh: continue if 'minitruck' == class_str and score < self.minitruck_score_thresh: continue if 'minibus' == class_str and score < self.minibus_score_thresh: continue single_data = { "cls": class_str, "score": float('%.2f' % score), "bbox": { "xmin": int(bbox[0]), "ymin": int(bbox[1]), "xmax": int(bbox[2]), "ymax": int(bbox[3]) } } data_list.append(single_data) #construcrion - data_list return data_list
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() with open(weights_file, 'rb') as f: src_blobs = pickle.load(f, encoding='latin1') if 'cfg' in src_blobs: saved_cfg = yaml.load(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in list(unscoped_param_names.keys()): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[ unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.debug( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name] .shape ) ) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype( np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in list(src_blobs.keys()): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob( '__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.debug( '{:s} preserved in workspace (unused)'.format(src_name))
def test_net(output_dir, ind_range=None, gpu_id=0): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.TEST.WEIGHTS != '', \ 'TEST.WEIGHTS must be set to the model file to test' assert not cfg.MODEL.RPN_ONLY, \ 'Use rpn_generate to generate proposals from RPN-only models' assert cfg.TEST.DATASET != '', \ 'TEST.DATASET must be set to the dataset name to test' roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset( ind_range) model = initialize_model_from_cfg(gpu_id=gpu_id) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images) timers = defaultdict(Timer) for i, entry in enumerate(roidb): if cfg.TEST.PRECOMPUTED_PROPOSALS: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue else: # Faster R-CNN type models generate proposals on-the-fly with an # in-network RPN; 1-stage models don't require proposals. box_proposals = None im = cv2.imread(entry['image']) depth = cv2.imread(entry['depth']) if cfg.MODEL.DEPTH else None with c2_utils.NamedCudaScope(gpu_id): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers, depth=depth) extend_results(i, all_boxes, cls_boxes_i) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = (timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time + timers['im_detect_keypoints'].average_time) misc_time = (timers['misc_bbox'].average_time + timers['misc_mask'].average_time + timers['misc_keypoints'].average_time) logger.info(('im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta)) if cfg.VIS: im_name = os.path.splitext(os.path.basename(entry['image']))[0] vis_utils.vis_one_image(im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i, segms=cls_segms_i, keypoints=cls_keyps_i, thresh=cfg.VIS_TH, box_alpha=0.8, dataset=dataset, show_class=True) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) save_object( dict(all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml), det_file) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps
def camera(cam, queue, width, height, fps, args): global running logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() start_time = 0 count = 0 # class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] class_names = [ '__background__', u'人', u'自行车', u'车', u'摩托车', 'airplane', u'车', 'train', u'车' ] color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [0, 0, 255], [255, 0, 255], [0, 0, 255]] cls_sel = [1, 2, 3, 4, 6, 8] cls_thresh = [1, 0.8, 0.6, 0.9, 0.6, 0.9, 0.8, 0.9, 0.8] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') capture = cv2.VideoCapture(cam) # capture.set(cv2.CAP_PROP_FRAME_WIDTH, width) # capture.set(cv2.CAP_PROP_FRAME_HEIGHT, height) # capture.set(cv2.CAP_PROP_FPS, fps) # size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) #print(cv2.__version__) total = 0 while (1): frame = {} ret, im = capture.read() # timers = defaultdict(Timer) # # detect one image if running == False: frame["img"] = im count = 0 else: if count == 5: start_time = time.time() count = count + 1 with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all(model, im, None, timers=None) demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, dataset=dummy_coco_dataset, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel, queue=q, frame=frame) if count >= 5: avg_fps = (count - 4) / (time.time() - start_time) cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), lineType=cv2.LINE_AA) queue.put(frame) print(queue.qsize())
def camera(queue, width, height, fps, args): global running logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 cam = args.camera if '.' not in cam: cam = int(cam) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg() # dummy_coco_dataset = dummy_datasets.get_coco_dataset() start_time = 0 count = 0 # class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] # class_names = [ # '__background__', u'人', u'自行车', u'小汽车', u'摩托车', 'airplane', # u'公共汽车', 'train', u'卡车'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] #class_names = [ # '__background__', u'人', u'自行车', u'车', u'摩托车', 'airplane', # u'车', 'train', u'车'] #color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [0, 0, 255], # [255, 0, 255], [0, 0, 255]] class_names = ['__background__', u'车'] color_list = [[0, 0, 0], [0, 255, 0]] #cls_sel = [1, 2, 3, 4, 6, 8] cls_sel = [1] #cls_thresh = [1, 0.5, 0.6, 0.8, 0.6, 0.9, 0.5, 0.9, 0.5] cls_thresh = [1, 0.01] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') capture = cv2.VideoCapture(cam) capture.set(cv2.CAP_PROP_FRAME_WIDTH, width) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, height) capture.set(cv2.CAP_PROP_FPS, fps) size = (float(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), float(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(size) fps = capture.get(cv2.CAP_PROP_FPS) fps = 25 print("%f fps" % fps) print(type(cam)) #record #fourcc = cv2.VideoWriter_fourcc(b'X', b'V', b'I', b'D') #fourcc = capture.get(cv2.CAP_PROP_FOURCC) fourcc = 1196444237.0 print(fourcc) videoWriter = cv2.VideoWriter( '/home/long/objectdetection/vpa_01010002855000000_out.avi', int(fourcc), 3, (int(size[0]) / 2, int(size[1]) / 2)) count2 = 0 et = 0 # fisherROI = [193,173,342,110,538,97,747,148,797,173,813,290,796,388,670,425,482,439,270,413,177,360] while (1): frame = {} ret, im = capture.read() #if type(cam)==str: # im=cv2.resize(im, None, None, fx= width/size[0], fy= height/size[1], interpolation=cv2.INTER_LINEAR) ori_im = copy.deepcopy(im) frame["img"] = im # _, contours, hierarchy = cv2.findContours(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY),cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # cv2.drawContours(im,[fisherROI],-1,(0,255,0),4) # # detect one image if running == False: if type(cam) == str: time.sleep(1 / fps) count = 0 queue.put(frame) # count = count+1 # if count%1000: # queue.put(frame) else: count = count + 1 # if count == 2: # start_time = time.time() # count = count + 1 if count % 10 == 1: st = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all(model, im, None, timers=None) print('one image detection without visulization cost %f fps' % (1 / (time.time() - st))) demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel, frame=frame) # if count2 >= 1: # et = et+time.time()-st # avg_fps = (count2) / et # cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), # lineType=cv2.LINE_AA) # count2 = count2 + 1 # with c2_utils.NamedCudaScope(0): # cls_boxes, _, _ = infer_engine.im_detect_all( # model, im, None, timers=None) # demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, show_class=True, # class_names=class_names, color_list=color_list, cls_sel=cls_sel, frame=frame) # if count>=2: # avg_fps = (count-1) / (time.time() - start_time) # cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), # lineType=cv2.LINE_AA) #img=cv2.resize(frame["img"],(960,540)) #videoWriter.write(img) # write frame to video queue.put(frame)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] train_cfg = open(args.cfg, 'r').read() num_classes = yaml.load(train_cfg)['MODEL']['NUM_CLASSES'] infer_cfg = open('/detectron/tools/infer_list.yaml', 'r').read() infer_list = yaml.load(infer_cfg)['thresholds'] if (num_classes == len(dummy_coco_dataset.classes)) & (len( dummy_coco_dataset.classes) == len(infer_list) + 1): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open( os.path.join(args.output_dir, "output_%s.csv" % dummy_coco_dataset.classes[1]), "w") as csvfile: writer = csv.writer(csvfile) writer.writerow( ["pic_name", "xmin", "ymin", "xmax", "ymax", "class", "score"]) for i, im_name in enumerate(im_list): logger.info('No.{} pic ({})starts predict'.format(i, im_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max( boxes[:, 4]) < min(infer_list.values()): continue areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for j in sorted_inds: bbox = boxes[j, :4] score = boxes[j, -1] class_text = dummy_coco_dataset.classes[classes[ j]] if dummy_coco_dataset is not None else 'id{:d}'.format( classes[j]) current_thresh = infer_list[class_text] if score < current_thresh: continue else: writer.writerow([ os.path.basename(im_name).split('.')[0], bbox[0], bbox[1], bbox[2], bbox[3], class_text, score ]) csvfile.close() else: logger.info('!!!!!!!wrong categories num')
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.video_name): print("video_name", args.video_name) else: print("video is not existing") cap = cv2.VideoCapture(args.video_name) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, dsize=(1280, 720)) timers = defaultdict(Timer) t1 = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, frame, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t1)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) frame = vis_utils.vis_one_image_opencv(frame, cls_boxes, segms=cls_segms, keypoints=cls_keyps, thresh=0.8, kp_thresh=2, show_box=True, dataset=dummy_coco_dataset, show_class=True) t2 = time.time() durr = float(t2 - t1) fps = 1.0 / durr cv2.putText(frame, "fps:%.3f" % fps, (20, 20), 4, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.imshow('Detection', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def detection(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() start_time = time.time() count = 0 # class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] class_names = [ '__background__', u'人'.encode('utf-8').decode('utf-8'), 'bicycle', u'车'.decode(), 'motorcycle', 'airplane', 'car', 'train', 'car'] color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [255, 255, 0], [255, 0, 255], [0, 0, 255]] cls_sel = [1, 2, 3, 4, 6, 8] cls_thresh = [1,0.6,0.5,0.8,0.5,0.9,0.7,0.9,0.5] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) cap = cv2.VideoCapture(0) cap.set(3, 800) cap.set(4, 600) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(cv2.__version__) fourcc = cv2.VideoWriter_fourcc(b'X', b'V', b'I', b'D') videoWriter = cv2.VideoWriter('2.avi', fourcc, 10, size) # fps = cap.get(cv2.CAP_PROP_FPS) # print(fps) while(1): # get a frame ret, im = cap.read() count = count + 1 # im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) timers = defaultdict(Timer) # detect one image with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all( model, im, None, timers=timers) # logger.info('Inference time: {:.3f}s'.format(time.time() - t)) # for k, v in timers.items():1440 # logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) # cls_boxes_sel=cls_boxes[[cls_id for cls_ind, cls_id in enumerate(cls_sel[0:])]] demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, dataset=dummy_coco_dataset, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel) avg_fps = count / (time.time() - start_time) cv2.putText(im, '{:s} {:.3f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), lineType=cv2.LINE_AA) # show a frame return im
def test_net( weights_file, dataset_name, proposal_file, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset( dataset_name, proposal_file, ind_range ) model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_scores = empty_results(num_images) timers = defaultdict(Timer) for i, entry in enumerate(roidb): # just get the ground truth boxes box_proposals = entry['boxes'][entry['gt_classes'] > 0] if len(box_proposals) == 0: cls_scores_i = blob_utils.zeros((0, cfg.MODEL.NUM_CLASSES)) extend_results(i, all_scores, cls_scores_i) continue im = cv2.imread(entry['image']) with c2_utils.NamedCudaScope(gpu_id): cls_scores_i = im_classify_bbox( model, im, box_proposals, timers ) extend_results(i, all_scores, cls_scores_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = ( timers['im_classify_bbox'].average_time ) misc_time = ( timers['misc_bbox'].average_time ) logger.info( ( 'im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})' ).format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta ) ) # if cfg.VIS: # im_name = os.path.splitext(os.path.basename(entry['image']))[0] # vis_utils.vis_one_image( # im[:, :, ::-1], # '{:d}_{:s}'.format(i, im_name), # os.path.join(output_dir, 'vis'), # cls_boxes_i, # segms=cls_segms_i, # keypoints=cls_keyps_i, # thresh=cfg.VIS_TH, # box_alpha=0.8, # dataset=dataset, # show_class=True # ) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = cfg.CFG_FILE + '_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) save_object( dict( all_scores=all_scores, cfg=cfg_yaml ), det_file ) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_scores
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 # get the weight path args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): # glob.iglob will generate a iterator that has same elements as that in glob.glob im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') # save as pdf using matplotlib.pyplot proc_im = vis_utils.vis_one_image_opencv(im, cls_boxes, dataset=dummy_coco_dataset, thresh=0.7, show_box=True, show_class=True) cv2.imshow('img', proc_im) cv2.waitKey(0) """ vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2 ) """ cv2.destroyWindow('img')
def main(args): datasetName = 'fashion_seg_val' #'furniture_val' logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 2 vis = True #False shuffleList = False #True args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) if args.cls_thrsh_file is not None: class_thresholds = { l.split('\t')[0]: float(l.rstrip().split('\t')[1]) for l in open(args.cls_thrsh_file, 'r').readlines() } print(class_thresholds) else: class_thresholds = None #dummy_coco_dataset = dummy_datasets.get_coco_dataset() dataset = JsonDataset(datasetName) if args.im_list is None: im_list = glob.glob(args.im_or_folder + '/*.' + args.image_ext) im_list = [osp.basename(n) for n in im_list] else: im_list = [ l.rstrip() + '.jpg' for l in open(args.im_list, 'r').readlines() ] if shuffleList: from random import shuffle shuffle(im_list) checkMkdir(args.output_dir) #outTable = osp.join(args.output_dir, 'HF_CT_Measurement_Detected_Boxes.tsv') #with open(outTable,'wb') as fout: for i, im_name in enumerate(im_list): output_name = os.path.basename(im_name) + '.png' outFileName = os.path.join(args.output_dir, output_name) if osp.exists(outFileName): print("{} exists! continue".format(outFileName)) continue imgFileName = osp.join(args.im_or_folder, im_name) print(imgFileName) im = cv2.imread(imgFileName) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) #for k, v in timers.items(): # logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if vis: vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dataset, #dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2)
def main(args): """A dummy COCO dataset that includes only the 'classes' field.""" dummy_coco_dataset = dummy_datasets.get_coco_dataset() ''''load initial Detectron config system''' cfg_orig = yaml.load(yaml.dump(cfg)) print("video is :",args.video_name) cap = cv2.VideoCapture(args.video_name) while cap.isOpened(): ret, frame = cap.read() if not ret: break t1 = time.time() frame = cv2.resize(frame,dsize=(1280,720)) if args.rpn_pkl is not None: proposal_boxes, _proposal_scores = get_rpn_box_proposals(frame, args) workspace.ResetWorkspace() else: proposal_boxes = None cls_boxes, cls_segms, cls_keyps = None, None, None for i in range(0, len(args.models_to_run), 2): pkl = args.models_to_run[i] yml = args.models_to_run[i + 1] cfg.immutable(False) '''load initial global Detectron config system''' merge_cfg_from_cfg(cfg_orig) """Load a yaml config file and merge it into the global config.""" merge_cfg_from_file(yml) if len(pkl) > 0: weights_file = pkl else: weights_file = cfg.TEST.WEIGHTS '''Number of GPUs to use''' cfg.NUM_GPUS = 1 assert_and_infer_cfg() '''Initialize a model from the global cfg.''' model = model_engine.initialize_model_from_cfg(weights_file) with c2_utils.NamedCudaScope(0): '''Inference detecting all''' cls_boxes_, cls_segms_, cls_keyps_ = model_engine.im_detect_all(model, frame, proposal_boxes) cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps workspace.ResetWorkspace() """Constructs a numpy array with the detections visualized.""" frame = vis_utils.vis_one_image_opencv( frame, cls_boxes, segms=cls_segms, keypoints=cls_keyps, thresh=0.8, kp_thresh=2, show_box=True, dataset=dummy_coco_dataset, show_class=True) t2 = time.time() durr = float(t2-t1) fps = 1.0 / durr cv2.putText(frame,"fps:%.3f"%fps,(20,20),4, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.imshow('Detection', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() HOST = '' PORT = 9111 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print('Socket created') s.bind((HOST, PORT)) print('Socket bind complete') s.listen(10) print('Socket now listening') conn, addr = s.accept() data = "" payload_size = struct.calcsize("L") i = 0 while True: i += 1 out_name = 'picture_{}'.format(i) logger.info('Processing {}'.format(out_name)) while len(data) < payload_size: data += conn.recv(40960) packed_msg_size = data[:payload_size] data = data[payload_size:] msg_size = struct.unpack("L", packed_msg_size)[0] print(msg_size) while len(data) < msg_size: data += conn.recv(40960) frame_data = data[:msg_size] data = data[msg_size:] im = pickle.loads(frame_data) im = cv2.imdecode(im, 1) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') c_img = vis_utils.vis_live( im[:, :, ::-1], # BGR -> RGB for visualization out_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2, ) try: data_s = cv2.imencode('.jpg', c_img[:, :, ::-1], [int(cv2.IMWRITE_JPEG_QUALITY), 75]) data_s = pickle.dumps(data_s, protocol=2) conn.sendall(struct.pack("L", len(data_s)) + data_s) except: data_s = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), 75]) data_s = pickle.dumps(data_s, protocol=2) conn.sendall(struct.pack("L", len(data_s)) + data_s)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] #Sort frames by number im_list = list(im_list) im_list.sort() json_output = [] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None: boxes = [] else: boxes = boxes.tolist() json_output.append({'frame': i, 'boxes': boxes}) # Skip writing PDF output # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=0.7, # kp_thresh=2 # ) with open(args.output_dir + '/boxes.json', 'w') as outfile: json.dump(json_output, outfile, indent=4)
def predict_dataset(project, out_dir="/tmp/predictions/", visualize=False, visualize_dataset="ade", randomize=False): if visualize: vis_dir = os.path.join(out_dir, "vis") if visualize_dataset == "ade": dummy_dataset = dummy_datasets.get_ade_dataset() else: dummy_dataset = dummy_datasets.get_coco_dataset() config = projects.get_config(args.project) img_dir = config["images"] pkl_dir = os.path.join(out_dir, "pkl") im_list = [line.rstrip() for line in open(config["im_list"], 'r')] if args.randomize: # Shuffle image list random.seed(3) random.shuffle(im_list) for i, im_name in enumerate(im_list): img_path = os.path.join(img_dir, im_name) img_basename = os.path.splitext(im_name)[0] pkl_path = os.path.join(pkl_dir, img_basename + '.pkl') if os.path.exists(pkl_path): print("Already done") continue logger.info('Processing {} -> {}'.format(im_name, vis_path)) logger.info('{}/{}'.format(i, len(im_list))) # Predict im = cv2.imread(img_path) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') # Save prediction pickle pkl_obj = (cls_boxes, cls_segms, cls_keyps) if not os.path.isdir(os.path.dirname(pkl_path)): os.makedirs(os.path.dirname(pkl_path)) pickle.dump(pkl_obj, open(pkl_path, "wb")) if visualize: vis_path = os.path.join(vis_dir, img_basename + '.png') if not os.path.isdir(os.path.dirname(vis_path)): os.makedirs(os.path.dirname(vis_path)) vis_image = vis_utils.vis_one_image_opencv(im[:, :, ::-1], cls_boxes, cls_segms, cls_keyps, thresh=0, kp_thresh=2, dataset=dummy_dataset, show_box=True, show_class=True) cv2.imwrite(vis_path, vis_image)
def main2(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] final_json = {'images': []} for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') results = get_result_json(cls_boxes, cls_segms, cls_keyps, thresh=args.save_thresh, dataset=dummy_coco_dataset) results['path'] = im_name results['width'] = im.shape[0] results['height'] = im.shape[1] final_json['images'].append(results) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=args.save_thresh, kp_thresh=2) with open('%s/results.json' % args.output_dir, 'w') as outfile: json.dump(final_json, outfile)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() count = 0 # class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] class_names = [ '__background__', u'人', u'自行车', u'车', u'摩托车', 'airplane', u'车', 'train', u'车'] color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [255, 255, 0], [255, 0, 255], [0, 0, 255]] cls_sel = [1, 2, 3, 4, 6, 8] cls_thresh = [1,0.8,0.5,0.9,0.5,0.9,0.8,0.9,0.8] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) #cap = cv2.VideoCapture(0) cap =cv2.VideoCapture('biaoding.avi') cap.set(3, 800) cap.set(4, 600) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(size) print(cv2.__version__) fourcc = cv2.VideoWriter_fourcc(b'X', b'V', b'I', b'D') videoWriter = cv2.VideoWriter('objectDetection.avi', fourcc, 10, size) start_time =0 # fps = cap.get(cv2.CAP_PROP_FPS) # print(fps) while(1): # get a frame ret, im = cap.read() count = count + 1 if count==5: start_time = time.time() # im = cv2.resize(im, None, None, fx=1000/800, fy=800/600, interpolation=cv2.INTER_LINEAR) print(im.shape) timers = defaultdict(Timer) # detect one image with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all( model, im, None, timers=timers) # logger.info('Inference time: {:.3f}s'.format(time.time() - t)) # for k, v in timers.items(): # logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) # cls_boxes_sel=cls_boxes[[cls_id for cls_ind, cls_id in enumerate(cls_sel[0:])]] demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, dataset=dummy_coco_dataset, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel,count=count,start_time=start_time) # show a frame if cv2.waitKey(1) & 0xFF == ord('q') or ret == False: break videoWriter.write(im) # write frame to video # cv2.imshow("detection", im) cap.release() videoWriter.release() cv2.destroyAllWindows()
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() image_dir = args.image_dir output_dir = args.output_dir json_dir = args.json_dir image_ext = args.image_ext inner_dirs = [d for d in listdir(image_dir) if isdir(join(image_dir, d))] for inner_dir in inner_dirs: if not os.path.isdir(join(output_dir, inner_dir)): os.mkdir(join(output_dir, inner_dir)) if not os.path.isdir(join(json_dir, inner_dir)): os.mkdir(join(json_dir, inner_dir)) files = [ f for f in listdir(join(image_dir, inner_dir)) if isfile(join(join(image_dir, inner_dir), f)) and f.endswith(image_ext) ] for f in files: image_file = join(join(image_dir, inner_dir), f) out_file = join(join(output_dir, inner_dir), f.replace(".%s" % image_ext, ".pdf")) json_file = join(join(json_dir, inner_dir), f.replace(".%s" % image_ext, ".json")) print("Processing %s -> %s" % (image_file, out_file)) logger.info('Processing {} -> {}'.format(image_file, out_file)) im = cv2.imread(image_file) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) results = get_result_json(cls_boxes, cls_segms, cls_keyps, thresh=args.save_thresh, dataset=dummy_coco_dataset) results['path'] = image_file results['width'] = im.shape[0] results['height'] = im.shape[1] vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization image_file, join(output_dir, inner_dir), cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=args.save_thresh, kp_thresh=2) with open(json_file, 'w') as outfile: json.dump(results, outfile)
def main(args): cfg_file = r'/home/twang/Documents/detectron/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml' weights_file = r'/home/twang/Documents/detectron/model-weights/mask_rcnn_R-101-FPN_2x_model_final.pkl' video_dir = args.video_dir print("video_dir", video_dir) video_name = os.path.basename(video_dir) video_name = os.path.splitext(video_name)[0] print("video_name", video_name) directory_box = os.path.join( os.path.join(r"/home/twang/Documents/HK-person", video_name), 'box') print("directory_box", directory_box) os.makedirs(directory_box) directory_mask = os.path.join( os.path.join(r"/home/twang/Documents/HK-person", video_name), 'mask') print("directory_mask", directory_mask) os.makedirs(directory_mask) merge_cfg_from_file(cfg_file) cfg.NUM_GPUS = 1 weights = cache_url(weights_file, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg(weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() cap = cv2.VideoCapture(video_dir) count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, dsize=(1280, 720)) total_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) Frame_step = 5 if current_frame + Frame_step < total_frame: cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame + Frame_step) timers = defaultdict(Timer) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, frame, None, timers=timers) thresh = 0.9 crop_box = True dataset = dummy_coco_dataset frame_for_box_crop = frame.copy() frame_for_mask = frame.copy() """Constructs a numpy array with the detections visualized.""" if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return frame if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap() mask_color_id = 0 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # crop each box if crop_box: #frame = vis_bbox(frame, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])) (x1, y1, w, h) = (int(bbox[0]), int(bbox[1]), int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1])) x2 = x1 + w y2 = y1 + h cropped = frame_for_box_crop[y1:y2, x1:x2] cv2.imwrite( "%s/person_Frame%i_%i.png" % (directory_box, current_frame, i), cropped) # crop each mask if segms is not None and len(segms) > i: color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 #frame = vis_mask(frame, masks[..., i], color_mask) (x1, y1, w, h) = (int(bbox[0]), int(bbox[1]), int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1])) x2 = x1 + w y2 = y1 + h cropped_mask = masks[..., i] cropped_mask = cropped_mask[y1:y2, x1:x2] cropped_img = frame_for_mask[y1:y2, x1:x2] cropped_img = vis_mask(cropped_img, cropped_mask, color_mask) cv2.imwrite( "%s/person_Mask_Frame%i_%i.png" % (directory_mask, current_frame, i), cropped_img) count += 1 print("done:%i" % count) else: pass cap.release() cv2.destroyAllWindows()
def predict(self,im): #class_str_list = [] data_list = [] with c2_utils.NamedCudaScope(self.gpu_id): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(self.model, im, None, None ) #get box classes if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = self.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < self.score_thresh: return data_list #get score areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) #no nms between classes '''im1 = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) result1= im1.copy() for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < self.score_thresh: continue #get class-str class_str = self.get_class_string(classes[i], score, self.dummy_coco_dataset) cv2.rectangle(result1,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(255,255,0),1) font = cv2.FONT_HERSHEY_SIMPLEX ((txt_w, txt_h), _) = cv2.getTextSize(class_str, font, 0.35, 1) txt_tl = int(bbox[0]), int(bbox[1]) - int(0.3 * txt_h) cv2.putText(result1, class_str, txt_tl, font, 0.35, (218, 227, 218), lineType=cv2.LINE_AA) txt_tl = int(bbox[0])+txt_w, int(bbox[1]) - int(0.3 * txt_h) cv2.putText(result1, ('%.2f' % score), txt_tl, font, 0.35, (218, 227, 218), lineType=cv2.LINE_AA) cv2.imwrite("test1.jpg", result1)''' #nms between classes #im2 = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) #result2= im2.copy() if (len(sorted_inds) > 0): nmsIndex = self.nms_between_classes(boxes, self.class_nms_thresh) #阈值为0.9,阈值越大,过滤的越少 for i in xrange(len(nmsIndex)): bbox = boxes[nmsIndex[i], :4] score = boxes[nmsIndex[i], -1] if score < self.score_thresh: continue #get class-str class_str = self.get_class_string(classes[nmsIndex[i]], score, self.dummy_coco_dataset) #score thresd per class if self.per_class_thresh: if 'autotruck' == class_str and score < self.autotruck_score_thresh: continue if 'forklift' == class_str and score < self.forklift_score_thresh: continue if 'digger' == class_str and score < self.digger_score_thresh: continue if 'car' == class_str and score < self.car_score_thresh: continue if 'bus' == class_str and score < self.bus_score_thresh: continue if 'tanker' == class_str and score < self.tanker_score_thresh: continue if 'person' == class_str and score < self.person_score_thresh: continue if 'minitruck' == class_str and score < self.minitruck_score_thresh: continue if 'minibus' == class_str and score < self.minibus_score_thresh: continue single_data = {"cls":class_str,"score":float('%.2f' % score),"bbox":{"xmin":int(bbox[0]),"ymin":int(bbox[1]),"xmax":int(bbox[2]),"ymax":int(bbox[3])}} data_list.append(single_data) '''cv2.rectangle(result2,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(255,255,0),1) font = cv2.FONT_HERSHEY_SIMPLEX ((txt_w, txt_h), _) = cv2.getTextSize(class_str, font, 0.55, 1) txt_tl = int(bbox[0]), int(bbox[1]) - int(0.3 * txt_h) cv2.putText(result2, class_str, txt_tl, font, 0.55, (218, 227, 218), lineType=cv2.LINE_AA) txt_tl = int(bbox[0])+txt_w, int(bbox[1]) - int(0.3 * txt_h) cv2.putText(result2, ('%.2f' % score), txt_tl, font, 0.35, (218, 227, 218), lineType=cv2.LINE_AA)''' #cv2.imwrite("test2.jpg", result2) #construcrion - data_list return data_list
def main(): cfg_file = r'/home/twang/Documents/detectron/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml' weights_file = r'/home/twang/Documents/detectron/model-weights/mask_rcnn_R-101-FPN_2x_model_final.pkl' merge_cfg_from_file(cfg_file) cfg.NUM_GPUS = 1 weights = cache_url(weights_file, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg(weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() video_dir = '/media/network_shared_disk/WangTao/test_video/KLA_airport/Entrance_Peak_Hour.avi' cap = cv2.VideoCapture(video_dir) print(*'MJPG') fourcc = cv2.VideoWriter_fourcc(*'MJPG') video_output = cv2.VideoWriter("out.mp4", fourcc, 5, (1280, 720)) while cap.isOpened(): t1 = time.time() ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, dsize=(1280, 720)) timers = defaultdict(Timer) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, frame, None, timers=timers) thresh = 0.7 show_box = True show_box = True show_class = True crop_person = True dataset = dummy_coco_dataset frame_for_person_crop = frame.copy() frame_for_mask = frame.copy() """Constructs a numpy array with the detections visualized.""" if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return frame if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) color_list = colormap() mask_color_id = 0 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue # show class (person, backpack, handbag, suitcase) class_default = ['person', 'backpack', 'handbag', 'suitcase'] if show_class: class_str, class_text = get_class_string( classes[i], score, dataset) if class_text in class_default: frame = vis_class(frame, (bbox[0], bbox[1] - 2), class_str) #show bounding box if show_box: frame = vis_bbox(frame, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1])) # show mask if segms is not None and len(segms) > i: color_mask = color_list[mask_color_id % len(color_list), 0:3] mask_color_id += 1 frame_for_mask = vis_mask(frame_for_mask, masks[..., i], color_mask) t2 = time.time() durr = float(t2 - t1) fps = 1.0 / durr #cv2.putText(frame,"fps:%.3f"%fps,(20,20),4, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.imshow('Detection using box', frame) cv2.imshow('Detection using mask', frame_for_mask) video_output.write(frame) #video_mask.write(frame_for_mask) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() video_output.release() cv2.destroyAllWindows()
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): assert conf_thresh >= 0. with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.int32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)], cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf > conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id, # np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape, # box_features[keep_boxes].shape, objects.shape)) # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10]) assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0) # assert(np.min(obj_prob[:10])>=0.2) # if np.min(obj_prob) < 0.2: # print('confidence score too low!', np.min(obj_prob[:10])) # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)): # print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1)) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': obj_boxes, 'region_feat': region_feat[keep_boxes, :], 'object': objects, 'obj_prob': obj_prob, 'cls_prob': cls_prob }
for i, image in enumerate(images): im_name, ext = os.path.splitext(image) #print(image) im_path = os.path.join(video_path, image) im = cv2.imread(im_path) if (im is None): print("**Problem detected with: ", dir_name, " Skipping ...") skip_vids.append(dir_name) print("Total videos skipped: ", len(skip_vids)) skipVid = True break timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): ans_scores, ans_boxes, cls_boxes, im_scales, fc7_feats, im_info = infer_engine.im_detect_all( model, im, None, timers=timers) im_dict[im_name] = { "scores": ans_scores, "boxes": ans_boxes, "cls_boxes": cls_boxes, "im_scales": im_scales, "fc7_feats": fc7_feats, "im_info": im_info } if skipVid: skipVid = False
def camera(width, height, fps, args): global running workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) utils.logging.setup_logging(__name__) logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 cam = args.camera if '.' not in cam: cam = int(cam) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg() # dummy_coco_dataset = dummy_datasets.get_coco_dataset() start_time = 0 count = 0 # class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] # class_names = [ # '__background__', u'人', u'自行车', u'小汽车', u'摩托车', 'airplane', # u'公共汽车', 'train', u'卡车'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] #class_names = [ # '__background__', u'人', u'自行车', u'车', u'摩托车', 'airplane', # u'车', 'train', u'车'] #color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [0, 0, 255], # [255, 0, 255], [0, 0, 255]] class_names = ['__background__', u'车'] color_list = [[0, 0, 0], [0, 255, 0]] #cls_sel = [1, 2, 3, 4, 6, 8] cls_sel = [1] #cls_thresh = [1, 0.5, 0.6, 0.8, 0.6, 0.9, 0.5, 0.9, 0.5] cls_thresh = [1, 0.5] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') #capture = cv2.VideoCapture(cam) capture = cv2.VideoCapture(cam, cv2.CAP_FFMPEG) capture.set(cv2.CAP_PROP_FRAME_WIDTH, width) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, height) capture.set(cv2.CAP_PROP_FPS, fps) size = (float(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), float(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(size) fps = capture.get(cv2.CAP_PROP_FPS) fps = 25 print("%f fps" % fps) print(type(cam)) fourcc = 1196444237.0 print(fourcc) timers = defaultdict(Timer) while (1): readtime = time.time() ret, im = capture.read() readtime2 = time.time() - readtime print("read time %f.2 ms" % (readtime2 * 1000)) # # detect one image, batch=1 count = count + 1 fbatch = [im] with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all_batch(model, fbatch, None, timers=timers) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if count == 1: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)')
def camera(queue, width, height, fps, args): global running logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 cam = args.camera if '.' not in cam: cam = int(cam) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg() # dummy_coco_dataset = dummy_datasets.get_coco_dataset() start_time = 0 count = 0 #class_names =[ # '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck'] class_names = [ '__background__', u'人', u'自行车', u'小汽车', u'摩托车', 'airplane', u'公共汽车', 'train', u'卡车'] # color_list=[[0,0,0],[255,0,0],[0,255,0],[0,0,255],[255,255,0],[0,255,255],[255,255,0],[255,0,255],[255,255,255]] # class_names = [ # '__background__', u'人', u'自行车', u'车', u'摩托车', 'airplane', # u'车', 'train', u'车'] color_list = [[0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 0, 255], [0, 0, 255], [255, 0, 255], [0, 0, 255]] cls_sel = [1, 2, 3, 4, 6, 8] cls_thresh = [1, 0.5, 0.6, 0.8, 0.6, 0.9, 0.5, 0.9, 0.5] if count == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) capture = cv2.VideoCapture(cam) capture.set(cv2.CAP_PROP_FRAME_WIDTH, width) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, height) capture.set(cv2.CAP_PROP_FPS, fps) size = (float(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),float(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(size) fps=capture.get(cv2.CAP_PROP_FPS) print("%f fps"%fps) print(type(cam)) #record fourcc = cv2.VideoWriter_fourcc(b'X', b'V', b'I', b'D') videoWriter = cv2.VideoWriter('ObjectDetection.avi', fourcc, 10, (int(size[0]),int(size[1]))) t1=0 dett=0 vt=0 while (1): frame = {} readtime =time.time() ret, im = capture.read() print("read frame time %f.2 ms"%((time.time()-readtime)*1000)) # if type(cam)==str: # im=cv2.resize(im, None, None, fx= width/size[0], fy= height/size[1], interpolation=cv2.INTER_LINEAR) # ori_im = copy.deepcopy(im) frame["img"] = im # # detect one image if running == False: # if type(cam)==str: # time.sleep(1/fps) count = 0 else: if count == 2: start_time = time.time() count = count + 1 print('remain time %f.2 ms'%((time.time()-t1-dett-vt)*1000)) t1 = time.time() timers = defaultdict(Timer) with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all( model, im, None, timers=timers) dett = time.time()-t1 for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel, frame=frame) vt = time.time()-t1-dett print("det time %f.2ms, vis time %f.2ms"%(dett*1000,vt*1000)) if count>=2: avg_fps = (count-1) / (time.time() - start_time) cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), lineType=cv2.LINE_AA) videoWriter.write(frame["img"]) # write frame to video # new_im = frame["img"] # combine=cv2.hconcat([ori_im, new_im]) # frame["img"] = combine queue.put(frame) videoWriter.write(frame["img"]) # write frame to video
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() while (1): frame = {} ret, im = capture.read() # if type(cam)==str: # im=cv2.resize(im, None, None, fx= width/size[0], fy= height/size[1], interpolation=cv2.INTER_LINEAR) ori_im = copy.deepcopy(im) frame["img"] = im # _, contours, hierarchy = cv2.findContours(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY),cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(im, [fisherROI], -1, (0, 255, 0), 4) # # detect one image if running == False: if type(cam) == str: time.sleep(1 / fps) count = 0 queue.put(frame) # count = count+1 # if count%1000: # queue.put(frame) else: count = count + 1 # if count == 2: # start_time = time.time() # count = count + 1 if count % 8 == 1: st = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, _, _ = infer_engine.im_detect_all(model, im, None, timers=None) print('one image detection without visulization cost %f fps' % (1 / (time.time() - st))) demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, show_class=True, class_names=class_names, color_list=color_list, cls_sel=cls_sel, frame=frame) # if count2 >= 1: # et = et+time.time()-st # avg_fps = (count2) / et # cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), # lineType=cv2.LINE_AA) # count2 = count2 + 1 # with c2_utils.NamedCudaScope(0): # cls_boxes, _, _ = infer_engine.im_detect_all( # model, im, None, timers=None) # demo_vis_one_imageboxes_opencv(im, cls_boxes, thresh=cls_thresh, show_box=True, show_class=True, # class_names=class_names, color_list=color_list, cls_sel=cls_sel, frame=frame) # if count>=2: # avg_fps = (count-1) / (time.time() - start_time) # cv2.putText(frame["img"], '{:s} {:.2f}/s'.format('fps', avg_fps), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), # lineType=cv2.LINE_AA) img = cv2.resize(frame["img"], (960, 540)) videoWriter.write(img) # write frame to video queue.put(frame) if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') t1 = time.time() vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2) t2 = time.time() - t1 print("vis time %f.2ms" % (t2 * 1000))
def main(args): datasetName = 'furniture_val' logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 vis = True #False args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) if args.cls_thrsh_file is not None: class_thresholds = {l.split('\t')[0]:float(l.rstrip().split('\t')[1]) for l in open(args.cls_thrsh_file,'r').readlines()} print (class_thresholds) else: class_thresholds = None dummy_coco_dataset = dummy_datasets.get_coco_dataset() dataset = JsonDataset(datasetName) print (args.im_or_folder) if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] checkMkdir(args.output_dir) outTable = osp.join(args.output_dir, 'HF_CT_Measurement_Detected_Boxes.tsv') with open(outTable,'wb') as fout: for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf') ) #logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers ) #logger.info('Inference time: {:.3f}s'.format(time.time() - t)) #for k, v in timers.items(): # logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)' ) if vis: vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2, ext=args.output_ext, out_when_no_box=args.out_when_no_box )
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): file_dir = os.listdir(args.im_or_folder) for name in file_dir: # find direcotry: print(name) os.mkdir('video/result/' + name) dir_00 = os.listdir(args.im_or_folder + '/' + name) for im_name in dir_00: # find image im_name = args.im_or_folder + '/' + name + '/' + im_name print(dir_00) if os.path.isdir(args.im_or_folder + '/' + name): im_list = glob.iglob(args.im_or_folder + '/' + name + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] try: out_name = os.path.join( args.output_dir + '/' + name, '{}'.format(os.path.basename(im_name) + '.pdf')) print(out_name) print(args.output_dir + '/' + name) logger.info('Processing {} -> {}'.format( im_name, out_name)) print(im_name) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) except Exception as e: print(e) continue for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) print(im_name) print(args.output_dir + '/' + name) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir + '/' + name, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2)
def main(args): if os.path.isdir(args.im_or_folder): im_list = glob.glob( os.path.join(args.im_or_folder, args.image_prefix + '*.' + args.image_ext)) else: assert False, "Has to be a folder with images extracted from images" im_list.sort() if os.path.isfile(args.output_csv_file): print('CSV file already present : {}'.format(args.output_csv_file)) return logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() video_name = os.path.basename(args.im_or_folder) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) det_bbox_df = pd.DataFrame(columns=[ u'video_name', u'image_name', u'box_ymin', u'box_xmin', u'box_ymax', u'box_xmax', u'box_score', u'box_class', u'img_width', u'img_height' ]) video_name_list = [] image_name_list = [] box_ymin_list = [] box_xmin_list = [] box_ymax_list = [] box_xmax_list = [] box_score_list = [] class_list = [] img_width_list = [] img_height_list = [] logger.info('Processing {} -> {}'.format(args.im_or_folder, args.output_dir)) for im_path in tqdm(im_list, desc='{:25}'.format(os.path.basename( args.im_or_folder))): out_name = os.path.join(args.output_dir, '{}'.format(os.path.basename(im_path))) #logger.info('Processing {} -> {}'.format(im_path, out_name)) im = cv2.imread(im_path) im_h, im_w, _ = im.shape im_name = os.path.basename(im_path) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) # logger.info('Inference time: {:.3f}s'.format(time.time() - t)) # for k, v in timers.items(): # logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) # if i == 0: # logger.info( # ' \ Note: inference on the first image will be slower than the ' # 'rest (caches and auto-tuning need to warm up)' # ) person_boxes = cls_boxes[1] # Ignore all classes other than person for visualization viz_boxes = [[] for x in range(len(cls_boxes))] viz_boxes[1] = person_boxes for bbox in person_boxes: xmin, ymin, xmax, ymax, score = bbox video_name_list.append(video_name) image_name_list.append(im_name) box_xmin_list.append(xmin / im_w) box_ymin_list.append(ymin / im_h) box_xmax_list.append(xmax / im_w) box_ymax_list.append(ymax / im_h) box_score_list.append(score) class_list.append('person') img_width_list.append(im_w) img_height_list.append(im_h) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, viz_boxes, segms=None, #cls_segms, keypoints=None, #cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2, ext='jpg') pass det_bbox_df['video_name'] = video_name_list det_bbox_df['image_name'] = image_name_list det_bbox_df['box_ymin'] = box_ymin_list det_bbox_df['box_xmin'] = box_xmin_list det_bbox_df['box_ymax'] = box_ymax_list det_bbox_df['box_xmax'] = box_xmax_list det_bbox_df['box_score'] = box_score_list det_bbox_df['box_class'] = class_list det_bbox_df['img_width'] = img_width_list det_bbox_df['img_height'] = img_height_list det_bbox_df.to_csv(args.output_csv_file) print('Write results to {}'.format(args.output_csv_file))
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): os.makedirs(args.output_dir, exist_ok=True) base_name = os.path.splitext(os.path.basename(im_name))[0] out_image = os.path.join(args.output_dir, '{}'.format(base_name + '.png')) out_data = os.path.join(args.output_dir, '{}'.format(base_name + '.pickle')) if os.path.isfile(out_image) and os.path.isfile(out_data): # logger.info('Already processed {}, skipping'.format(im_name)) continue else: logger.info('Processing {} -> {}'.format(im_name, out_image)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') if not os.path.isfile(out_data): with open(out_data, 'wb') as f: pickle.dump( { 'boxes': cls_boxes, 'segmentations': cls_segms, 'keypoints': cls_keyps }, f) if not os.path.isfile(out_image): logging.info('Visualizing %s', out_image) vis_utils.vis_one_image( im[:, :, ::-1], # BGR -> RGB for visualization base_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2, dpi=300, ext='png')