def save_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores): MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH scores = scores[0].cpu() boxes = boxes[0] num_classes = scores.shape[1] boxes = BUABoxes(boxes.reshape(-1, 4)) boxes.clip((dataset_dict['image'].shape[1]/dataset_dict['im_scale'], dataset_dict['image'].shape[2]/dataset_dict['im_scale'])) boxes = boxes.tensor.view(-1, num_classes*4).cpu() # R x C x 4 cls_boxes = torch.zeros((boxes.shape[0], 4)) for idx in range(boxes.shape[0]): cls_idx = torch.argmax(scores[idx, 1:]) + 1 cls_boxes[idx, :] = boxes[idx, cls_idx * 4:(cls_idx + 1) * 4] max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, num_classes): cls_scores = scores[:, cls_ind] keep = nms(cls_boxes, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_bboxes = cls_boxes[keep_boxes] output_file = os.path.join(args.output_dir, im_file.split('.')[0]) np.savez_compressed(output_file, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1))
def save_roi_features(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores=None): MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) max_obj = torch.zeros((scores.shape[0]), dtype=torch.long).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_obj[keep] = torch.where(cls_scores[keep] > max_conf[keep], torch.tensor(cls_ind, dtype=torch.long), max_obj[keep]) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes] image_bboxes = dets[keep_boxes] image_objects_conf = max_conf[keep_boxes].cpu().numpy() image_objects = max_obj[keep_boxes].cpu().numpy() if not attr_scores is None: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1) image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1) info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf, 'attrs_id': image_attrs, 'attrs_conf': image_attrs_conf, } else: info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf } output_file = os.path.join(args.output_dir, im_file.split('.')[0]) np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info)
def extractor_postprocess(boxes, scores, features_pooled, input_per_image, extractor): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ MIN_BOXES = extractor.MIN_BOXES MAX_BOXES = extractor.MAX_BOXES CONF_THRESH = extractor.CONF_THRESH cur_device = scores.device dets = boxes / input_per_image["im_scale"] max_conf = torch.zeros((scores.shape[0])).to(cur_device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] # keep_boxes = torch.argsort(max_conf, descending=True)[:100] # feat_list.append(feats[i][keep_boxes]) image_feat = features_pooled[keep_boxes] image_bboxes = dets[keep_boxes] return image_feat, image_bboxes
def predict(self, image): try: # convert image to opencv format x = np.array(image) x = x[:, :, ::-1].copy() dataset_dict = get_image_blob(x, self._cfg.MODEL.PIXEL_MEAN) with torch.set_grad_enabled(False): boxes, scores, features_pooled, attr_scores = self._model( [dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() attr_scores = attr_scores[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= self._threshold).flatten() if len(keep_boxes) < self._min_boxes: keep_boxes = torch.argsort(max_conf, descending=True)[:self._min_boxes] elif len(keep_boxes) > self._max_boxes: keep_boxes = torch.argsort(max_conf, descending=True)[:self._max_boxes] boxes = dets[keep_boxes].numpy() objects = np.argmax(scores[keep_boxes].numpy()[:, 1:], axis=1) attr = np.argmax(attr_scores[keep_boxes].numpy()[:, 1:], axis=1) attr_conf = np.max(attr_scores[keep_boxes].numpy()[:, 1:], axis=1) outputs = [] for i in range(len(keep_boxes)): # if attr_conf[i] > attr_thresh: # cls = attributes[attr[i]+1] + " " + cls outputs.append(self._classes[objects[i] + 1]) return outputs except Exception as e: print(e, flush=True) return []
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection2 Inference") parser.add_argument( "--config-file", default="configs/bua-caffe/extract-bua-caffe-r101.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--mode", default="caffe", type=str, help="bua_caffe, ...") parser.add_argument('--out-dir', dest='output_dir', help='output directory for features', default="features") parser.add_argument('--image-dir', dest='image_dir', help='directory with images', default="image") parser.add_argument( "--resume", action="store_true", help="whether to attempt to resume from the checkpoint directory", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg = setup(args) MIN_BOXES = 10 MAX_BOXES = 100 CONF_THRESH = 0.2 model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume) # Extract features. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) model.eval() for im_file in tqdm.tqdm(imglist): im = cv2.imread(os.path.join(args.image_dir, im_file)) dataset_dict = get_image_blob(im) with torch.set_grad_enabled(False): # boxes, scores, features_pooled = model([dataset_dict]) if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes] image_bboxes = dets[keep_boxes] image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1) image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1) if cfg.MODEL.BUA.ATTRIBUTE_ON: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1) image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1) info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf, 'attrs_id': image_attrs, 'attrs_conf': image_attrs_conf, } else: info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf } output_file = os.path.join(args.output_dir, im_file.split('.')[0]) np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info)
def extract_feat(image_path): MIN_BOXES = 10 MAX_BOXES = 100 CONF_THRESH = 0.2 im = cv2.imread(image_path) print('image shape:', im.shape) dataset_dict = get_image_blob(im) with torch.set_grad_enabled(False): # boxes, scores, features_pooled = model([dataset_dict]) if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = net_img( [dataset_dict]) else: boxes, scores, features_pooled = net_img([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes] image_bboxes = dets[keep_boxes] image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1) image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1) if cfg.MODEL.BUA.ATTRIBUTE_ON: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1) image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1) info = { 'image_id': image_path.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf, 'attrs_id': image_attrs, 'attrs_conf': image_attrs_conf, } else: info = { 'image_id': image_path.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf } return image_feat, image_bboxes, im.shape[:2]
im = cv2.imread(im_file) dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) with torch.set_grad_enabled(False): boxes, scores, features_pooled, attr_scores = model([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() attr_scores = attr_scores[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.axis('off') plt.imshow(im) boxes = dets[keep_boxes].numpy() objects = np.argmax(scores[keep_boxes].numpy()[:, 1:], axis=1)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection2 Inference") parser.add_argument( "--config-file", default="configs/bua-caffe/extract-bua-caffe-r101-fix36.yaml", metavar="FILE", help="path to config file", ) # --image-dir or --image parser.add_argument('--image-dir', dest='image_dir', help='directory with images', default="datasets/demos") parser.add_argument( '--image', dest='image', help='image') # e.g. datasets/demos/COCO_val2014_000000060623.jpg parser.add_argument("--mode", default="caffe", type=str, help="bua_caffe, ...") parser.add_argument('--out-dir', dest='output_dir', help='output directory for features', default="features") parser.add_argument('--out-name', dest='output_name', help='output file name for features', default="demos") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg = setup(args) model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=True) model.eval() # Extract features. if args.image: imglist = [args.image] else: imglist = os.listdir(args.image_dir) imglist = [os.path.join(args.image_dir, fn) for fn in imglist] num_images = len(imglist) print('Number of images: {}.'.format(num_images)) imglist.sort() MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH classes = [] with open(os.path.join('evaluation/objects_vocab.txt')) as f: for object in f.readlines(): names = [n.lower().strip() for n in object.split(',')] classes.append(names[0]) attributes = [] with open(os.path.join('evaluation/attributes_vocab.txt')) as f: for att in f.readlines(): names = [n.lower().strip() for n in att.split(',')] attributes.append(names[0]) classes = np.array(classes) attributes = np.array(attributes) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with h5py.File(os.path.join(args.output_dir, '%s_fc.h5' % args.output_name), 'a') as file_fc, \ h5py.File(os.path.join(args.output_dir, '%s_att.h5' % args.output_name), 'a') as file_att, \ h5py.File(os.path.join(args.output_dir, '%s_box.h5' % args.output_name), 'a') as file_box: informations = {} try: for im_file in tqdm.tqdm(imglist): img_nm = os.path.basename(im_file) im = cv2.imread(im_file) if im is None: print(im_file, "is illegal!") continue dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) # extract roi features attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where( cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes].numpy() image_bboxes = dets[keep_boxes].numpy() image_objects_conf = np.max(scores[keep_boxes].numpy()[:, 1:], axis=1) image_objects = classes[np.argmax( scores[keep_boxes].numpy()[:, 1:], axis=1)] info = { 'image_name': img_nm, 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects': image_objects, 'objects_conf': image_objects_conf } if attr_scores is not None: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max( attr_scores[keep_boxes].numpy()[:, 1:], axis=1) image_attrs = attributes[np.argmax( attr_scores[keep_boxes].numpy()[:, 1:], axis=1)] info['attrs'] = image_attrs info['attrs_conf'] = image_attrs_conf file_fc.create_dataset(img_nm, data=image_feat.mean(0)) file_att.create_dataset(img_nm, data=image_feat) file_box.create_dataset(img_nm, data=image_bboxes) informations[img_nm] = info finally: file_fc.close() file_att.close() file_box.close() pickle.dump( informations, open( os.path.join(args.output_dir, '%s_info.pkl' % args.output_name), 'wb')) print( '--------------------------------------------------------------------' )