def __init__(self, input_file, use_maskrcnn_benchmark=True, transform=None): h = h5py.File(input_file) self.imgs = h['images'] self.captions = h['captions'] self.captions_per_img = h.attrs['captions_per_image'] self.coco_demo = COCODemo(cfg) assert self.captions.shape[0] // self.imgs.shape[ 0] == self.captions_per_img if transform is not None: # if customer transform rules are defined # we will use this self.transform = transform elif use_maskrcnn_benchmark: # if we use maskrcnn_benchmark as our encoder # we need to follow the corresponding image # pre-process procedure self.transform = self.coco_demo.build_transform() else: self.transform = trn.Compose([trn.Resize(255), trn.ToTensor()]) assert self.imgs.shape[0] * 1 == self.captions.shape[0]
def __init__(self, confidence_threshold, area_threshold, classes=None): # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) self.area_threshold = area_threshold self.demo = COCODemo(cfg, confidence_threshold=confidence_threshold) # Load classes and determine indices of desired object classes coco_classes = open('detection/classes.txt').read().strip().split('\n') if not classes: classes = coco_classes self.classes = [coco_classes.index(name) for name in classes]
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.7, ) dataDir = '/depudata1/coco' dataType = 'val2017' annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataType) coco = COCO(annFile) # display COCO categories and supercategories cats = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in cats] print('COCO categories: \n{}\n'.format(' '.join(nms))) nms = set([cat['supercategory'] for cat in cats]) print('COCO supercategories: \n{}'.format(' '.join(nms))) catIds = coco.getCatIds(catNms=['person']) imgIds = coco.getImgIds(catIds=catIds) # imgIds = coco.getImgIds(imgIds = [341681]) curr_img_id = imgIds[np.random.randint(0, len(imgIds))] img = coco.loadImgs(curr_img_id)[0] image = io.imread(img['coco_url']) # image = load("http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg") plt.imshow(image) predictions = coco_demo.run_on_opencv_image(image) plt.imshow(predictions) str1 = '%s.jpg' % curr_img_id plt.savefig(str1)
class MaskRCNN: def __init__(self, confidence_threshold, area_threshold, classes=None): # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) self.area_threshold = area_threshold self.demo = COCODemo(cfg, confidence_threshold=confidence_threshold) # Load classes and determine indices of desired object classes coco_classes = open('detection/classes.txt').read().strip().split('\n') if not classes: classes = coco_classes self.classes = [coco_classes.index(name) for name in classes] def detect(self, image): # Retain only predictions with confidence above confidence threshold predictions = self.demo.compute_prediction(image) predictions = self.demo.select_top_predictions(predictions) masks = predictions.get_field("mask").numpy() labels = predictions.get_field("labels").numpy() scores = predictions.get_field("scores").numpy() masks = np.squeeze(masks, 1) # Retain only desired object classes matches = [i for i, class_id in enumerate(labels) if class_id in self.classes] scores = scores[matches] masks = masks[matches, :, :] # Retain only masks below area threshold total_area = image.shape[0] * image.shape[1] areas = [np.count_nonzero(mask) for mask in masks] matches = [i for i, mask_area in enumerate(areas) if (mask_area / total_area) <= self.area_threshold] n_rejects = scores.shape[0] - len(matches) if n_rejects > 0: logging.getLogger('progress').info('Rejected ' + str(n_rejects) + ' due to area threshold') scores = scores[matches] masks = masks[matches, :, :] for score, area in zip(scores, areas): logging.getLogger('info').info(str(score) + '\t' + str(area) + '\t' + str(area / total_area)) masks = np.clip(masks * 255., 0, 255).astype(np.uint8) return scores, masks
def main(): # this makes our figures bigger pylab.rcParams['figure.figsize'] = 20, 12 config_file = '../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml' #'../configs/e2e_mask_rcnn_fbnet.yaml' #'../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml' #'../configs/e2e_mask_rcnn_fbnet.yaml' #'../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml' #'../configs/e2e_mask_rcnn_fbnet.yaml' #'../configs/e2e_mask_rcnn_fbnet.yaml' #'../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml' # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) coco_demo = COCODemo(cfg, min_image_size=800, confidence_threshold=0.3, load_weight='../Trained Model/new/model_0027500.pth') # from http://cocodataset.org/#explore?id=345434 #image = load("https://www.abc.net.au/news/image/9857250-3x2-940x627.jpg") #imshow(image) # compute predictions #print(image) #image = Image.open('./0008.png').convert('RGB') #print(image.size) #'/mnt/DATA/Download/Dataset/t-less_v2/test_primesense/01/rgb/0004.png' /54 image = imread( '/mnt/DATA/Download/Dataset/t-less_v2/train_primesense/30/rgb/0001.png' ) #imread('./Test.jpg') image = np.array(image)[:, :, [2, 1, 0]] predictions = coco_demo.run_on_opencv_image(image) imshow(predictions)
def main(): cfg.merge_from_file( "../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml") #cfg.merge_from_list(None) cfg.freeze() coco_demo = COCODemo( cfg, confidence_threshold=0.7, show_mask_heatmaps=False, masks_per_dim=2, min_image_size=224, ) img = cv2.imread("test.jpg") composite = coco_demo.run_on_opencv_image(img) print(composite is None) cv2.imwrite("./result.jpg", composite)
class CaptionDataset(Dataset): def __init__(self, input_file, use_maskrcnn_benchmark=True, transform=None): h = h5py.File(input_file) self.imgs = h['images'] self.captions = h['captions'] self.captions_per_img = h.attrs['captions_per_image'] self.coco_demo = COCODemo(cfg) assert self.captions.shape[0] // self.imgs.shape[ 0] == self.captions_per_img if transform is not None: # if customer transform rules are defined # we will use this self.transform = transform elif use_maskrcnn_benchmark: # if we use maskrcnn_benchmark as our encoder # we need to follow the corresponding image # pre-process procedure self.transform = self.coco_demo.build_transform() else: self.transform = trn.Compose([trn.Resize(255), trn.ToTensor()]) assert self.imgs.shape[0] * 1 == self.captions.shape[0] def __getitem__(self, item): img = self.imgs[item // self.captions_per_img] img = self.transform(img) caption = self.captions[item] caption = torch.from_numpy(caption).long() data = {'image': img, 'caption': caption} return data def __len__(self): return self.captions.shape[0]
def create_nuscenes_dataset(version, output_path, mode='train'): """ For each video, we store a sequence of data with the following information: - image (H, W, 3) jpg - instance segmentation (H, W) np.array<np.uint8> with values from [0, MAX_INSTANCES-1]. Note that the instances are not aligned with position/velocity i.e. id 1 in instance segmentation corresponds to element 0 in position/velocity when including more classes, we can store a 4D tensor (N_CLASSES, H, W) - position (MAX_INSTANCES, 3) np.array - velocity (MAX_INSTANCES, 3) np.array """ ## Yaw angle different on camera: https://github.com/nutonomy/nuscenes-devkit/issues/21 # Load Mask R-CNN # update the config options with the config file cfg.merge_from_file(MASK_RCNN_CONFIG_FILE) # manual override some options # cfg.merge_from_list(['MODEL.DEVICE', 'cpu']) mask_rcnn = COCODemo( cfg, confidence_threshold=0.8, ) # Load NuScenes nusc = NuScenes(version=version, dataroot=NUSCENES_ROOT, verbose=True) scene_splits = create_splits_scenes() print('Begin iterating over Nuscenes') print('-' * 30) # Loop over dataset for scene in nusc.scene: # Ensure the scene belongs to the split if scene['name'] not in scene_splits[mode]: continue scene_path = os.path.join(output_path, mode, scene['name']) print('scene_path: {}'.format(scene_path)) os.makedirs(scene_path, exist_ok=True) t = 0 sample_token = scene['first_sample_token'] while sample_token: print('Image {}'.format(t)) sample = nusc.get('sample', sample_token) data = match_instance_seg_and_bbox(nusc, mask_rcnn, sample) if data is not None: data['image'].save(os.path.join(scene_path, '{:04d}_image_tmp.jpg'.format(t))) np.save(os.path.join(scene_path, '{:04d}_instance_seg_tmp.npy'.format(t)), data['instance_seg']) np.save(os.path.join(scene_path, '{:04d}_position_tmp.npy'.format(t)), data['position']) np.save(os.path.join(scene_path, '{:04d}_velocity_tmp.npy'.format(t)), data['velocity']) np.save(os.path.join(scene_path, '{:04d}_orientation_tmp.npy'.format(t)), data['orientation']) np.save(os.path.join(scene_path, '{:04d}_size_tmp.npy'.format(t)), data['size']) np.save(os.path.join(scene_path, '{:04d}_token_tmp.npy'.format(t)), data['token']) np.save(os.path.join(scene_path, '{:04d}_intrinsics_tmp.npy'.format(t)), data['intrinsics']) np.save(os.path.join(scene_path, '{:04d}_sample_token_tmp.npy'.format(t)), np.array([sample_token])) sample_token = sample['next'] t += 1 link_instance_ids(nusc, scene_path) print('------------------\n') print('Computing depth maps') print('-' * 30) # Compute depth map here. generate_depth(output_path, mode) print('Dataset saved.')
0.4156557023525238, 0.4763634502887726, 0.4724511504173279, 0.4915047585964203, 0.5006274580955505, 0.5124194622039795, 0.47004589438438416, 0.5374764204025269, 0.5876904129981995, 0.49395060539245605, 0.5102297067642212, 0.46571290493011475, 0.5164387822151184, 0.540651798248291, 0.5323763489723206, 0.5048757195472717, 0.5302401781082153, 0.48333442211151123, 0.5109739303588867, 0.4077408015727997, 0.5764586925506592, 0.5109297037124634, 0.4685552418231964, 0.5148998498916626, 0.4224434792995453, 0.4998510777950287 ] demo_im_names = os.listdir(args.images_dir) # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo(cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size) count = 0 start = time.time() for im_name in demo_im_names: img = cv2.imread(os.path.join(args.images_dir, im_name)) count += 1 if img is None: continue start_time = time.time() composite = coco_demo.run_on_opencv_image(img) # print(composite) time_pytorch = (time.time() - start) / count print("Pytorch average inference time: {:.2f}s".format(time_pytorch))
import torch import numpy as np from tqdm import tqdm from maskrcnn_benchmark.config import cfg from maskrcnn_benchmark.data.datasets.evaluation import evaluate from ..utils.comm import is_main_process, get_world_size from ..utils.comm import all_gather from ..utils.comm import synchronize from ..utils.timer import Timer, get_time_str from .bbox_aug import im_detect_bbox_aug from demo.predictor import COCODemo coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.7, ) def compute_on_dataset(model, data_loader, device, timer=None): model.eval() results_dict = {} cpu_device = torch.device("cpu") for _, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch with torch.no_grad(): if timer: timer.tic() if cfg.TEST.BBOX_AUG.ENABLED: output = im_detect_bbox_aug(model, images, device)
import time # set image dpi plt.rcParams['figure.dpi'] = 300 # define the address of the image files dataset_dir = './datasets/kiktech/skyeye_data' annotations_dir = '{}/annotations'.format(dataset_dir) test_images_dir = '{}/train'.format(dataset_dir) test_ann_file = '{}/kiktech_train.json'.format(annotations_dir) coco = COCO(test_ann_file) config_file = './configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml' cfg.merge_from_file(config_file) coco_demo = COCODemo(cfg, # TODO: add confidence threshold ) device = torch.device(cfg.MODEL.DEVICE) def build_transforms(cfg): """ copy from predictor """ normalize_transform = T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), T.ToTensor(),
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection") parser.add_argument( "--config-file", default="/home/mrvain/Workspace/test/FCOS_PLUS/configs/fcos/fcos_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--weights", default="/home/mrvain/Workspace/test/fcos_r50.pth", metavar="FILE", help="path to the trained model", ) parser.add_argument( "--images-dir", default="/home/mrvain/Workspace/test/images", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--min-image-size", type=int, default=800, help="smallest size of the image to feed to the model", ) parser.add_argument( "opts", help="modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) cfg.MODEL.WEIGHT = args.weights cfg.freeze() # The following per-class thresholds are computed by maximizing # per-class f-measure in their precision-recall curve. # Please see compute_thresholds_for_classes() in coco_eval.py for details. thresholds_for_classes = [ 0.23860901594161987, 0.24108672142028809, 0.2470853328704834, 0.2316885143518448, 0.2708061933517456, 0.23173952102661133, 0.31990334391593933, 0.21302376687526703, 0.20151866972446442, 0.20928964018821716, 0.3793887197971344, 0.2715213894844055, 0.2836397588253021, 0.26449233293533325, 0.1728038638830185, 0.314998596906662, 0.28575003147125244, 0.28987520933151245, 0.2727000117301941, 0.23306897282600403, 0.265937477350235, 0.32663893699645996, 0.27102580666542053, 0.29177549481391907, 0.2043062448501587, 0.24331751465797424, 0.20752687752246857, 0.22951272130012512, 0.22753854095935822, 0.2159966081380844, 0.1993938684463501, 0.23676514625549316, 0.20982342958450317, 0.18315598368644714, 0.2489681988954544, 0.24793922901153564, 0.287187397480011, 0.23045086860656738, 0.2462811917066574, 0.21191294491291046, 0.22845126688480377, 0.24365000426769257, 0.22687821090221405, 0.18365581333637238, 0.2035856395959854, 0.23478077352046967, 0.18431290984153748, 0.18184082210063934, 0.2708037495613098, 0.2268175482749939, 0.19970566034317017, 0.21832780539989471, 0.21120598912239075, 0.270445853471756, 0.189377561211586, 0.2101106345653534, 0.2112293541431427, 0.23484709858894348, 0.22701986134052277, 0.20732736587524414, 0.1953316181898117, 0.3237660229206085, 0.3078872859477997, 0.2881140112876892, 0.38746657967567444, 0.20038367807865143, 0.28123822808265686, 0.2588447630405426, 0.2796839773654938, 0.266757994890213, 0.3266656696796417, 0.25759157538414, 0.2578003704547882, 0.17009201645851135, 0.29051828384399414, 0.24002137780189514, 0.22378061711788177, 0.26134759187698364, 0.1730124056339264, 0.1857597529888153 ] image_filenames = os.listdir(args.images_dir) model = COCODemo( cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size ) for image_filename in image_filenames: img = cv2.imread(os.path.join(args.images_dir, image_filename)) if img is None: continue start_time = time.time() composite = model.run_on_opencv_image(img) print("{}\ttime: {:.2f}s".format(image_filename, time.time() - start_time)) cv2.imshow(image_filename, composite) print("Press any keys ...") cv2.waitKey() cv2.destroyAllWindows()
from maskrcnn_benchmark.config import cfg from demo.predictor import COCODemo # Commented out IPython magic to ensure Python compatibility. # %cd /content/drive/MyDrive/maskrcnn-benchmark config_file = "/content/drive/MyDrive/maskrcnn-benchmark/configs/myconfig/e2e_mask_rcnn_R_50_FPN_1x.yaml" # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) my_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.7, ) def load(url): """ Given an url of an image, downloads the image and returns a PIL image """ response = requests.get(url) pil_image = Image.open(BytesIO(response.content)).convert("RGB") # convert to BGR format image = np.array(pil_image)[:, :, [2, 1, 0]] return image def imshow(img):
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/embed_mask/embed_mask_R50_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--weights", default="models/embed_mask_R50_1x.pth", metavar="FILE", help="path to the trained model", ) parser.add_argument( "--images-dir", default="demo/images", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--out-dir", default="demo/output", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--min-image-size", type=int, default=800, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.MODEL.WEIGHT = args.weights cfg.freeze() # The following per-class thresholds are computed by maximizing # per-class f-measure in their precision-recall curve. # Please see compute_thresholds_for_classes() in coco_eval.py for details. thresholds_for_classes = [ 0.24721044301986694, 0.2316334992647171, 0.23782534897327423, 0.2447730302810669, 0.26833730936050415, 0.2909756898880005, 0.22202278673648834, 0.23603129386901855, 0.19448654353618622, 0.2009030282497406, 0.2205723077058792, 0.4426179826259613, 0.2812938094139099, 0.23200270533561707, 0.22222928702831268, 0.34396135807037354, 0.29865574836730957, 0.2620207965373993, 0.23538640141487122, 0.21343813836574554, 0.23408174514770508, 0.3619556427001953, 0.25181055068969727, 0.2753196656703949, 0.20989173650741577, 0.256824254989624, 0.24953776597976685, 0.2482326775789261, 0.23516853153705597, 0.3231242001056671, 0.1875445693731308, 0.22903329133987427, 0.220603808760643, 0.1938045769929886, 0.2102973908185959, 0.30885136127471924, 0.21589471399784088, 0.2611836791038513, 0.27154257893562317, 0.2536311149597168, 0.21989859640598297, 0.2741137146949768, 0.24886088073253632, 0.20183633267879486, 0.17529579997062683, 0.2467200607061386, 0.2103690654039383, 0.23187917470932007, 0.28766655921936035, 0.21596665680408478, 0.24378667771816254, 0.2806374728679657, 0.23764009773731232, 0.2884339392185211, 0.19776469469070435, 0.29654744267463684, 0.23793953657150269, 0.2753768265247345, 0.24718035757541656, 0.2166261523962021, 0.22458019852638245, 0.36707887053489685, 0.29586368799209595, 0.24396133422851562, 0.3916597068309784, 0.2478819191455841, 0.3140171468257904, 0.23574240505695343, 0.30935078859329224, 0.2633970379829407, 0.22616524994373322, 0.22482863068580627, 0.25680482387542725, 0.184458926320076, 0.31002628803253174, 0.2936173677444458, 0.2688758671283722, 0.2438362091779709, 0.17232654988765717, 0.1869594156742096 ] demo_im_names = os.listdir(args.images_dir) # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size) for im_name in demo_im_names: img = cv2.imread(os.path.join(args.images_dir, im_name)) if img is None: continue start_time = time.time() composite = coco_demo.run_on_opencv_image(img) print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time)) cv2.imwrite(os.path.join(args.out_dir, im_name), composite) print("Press any keys to exit ...")
def main(): parser = argparse.ArgumentParser( description="Multi Object Tracking Video Demo") parser.add_argument( "--video-file", metavar="FILE", help="path to video file", ) parser.add_argument( "--config-file", default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.7, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=224, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) cam = cv2.VideoCapture(args.video_file) if cam.isOpened(): # get vcap property width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) # float fps = cam.get(cv2.CAP_PROP_FPS) num_frame = cam.get(cv2.CAP_PROP_FRAME_COUNT) print("VIDEO INFO:") print("Width %f, Height %f, FPS %f, Frame_number %f\n" % (width, height, fps, num_frame)) current_frame = 0 success = True while success: success, img = cam.read() if success: start_time = time.time() visualize, prediction, tracking = coco_demo.run_on_opencv_image( img, current_frame) print("%d / %d, processing time: %.2fs" % (current_frame, num_frame, time.time() - start_time)) coco_demo.saveResults( str(current_frame).zfill(6), visualize, prediction, tracking) current_frame += 1
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.7, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=224, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) cam = cv2.VideoCapture(0) while True: start_time = time.time() ret_val, img = cam.read() composite = coco_demo.run_on_opencv_image(img) print("Time: {:.2f} s / img".format(time.time() - start_time)) cv2.imshow("COCO detections", composite) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/embed_mask/embed_mask_R50_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--weights", default="models/embed_mask_R50_1x.pth", metavar="FILE", help="path to the trained model", ) parser.add_argument( "--images-dir", default="demo/images", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--out-dir", default="demo/output", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--min-image-size", type=int, default=800, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.MODEL.WEIGHT = args.weights cfg.freeze() # The following per-class thresholds are computed by maximizing # per-class f-measure in their precision-recall curve. # Please see compute_thresholds_for_classes() in coco_eval.py for details. thresholds_for_classes = [ 0.24445024132728577, 0.2556260824203491, 0.2336651235818863, 0.26643890142440796, 0.22829005122184753, 0.27605465054512024, 0.29680299758911133, 0.24539557099342346, 0.22566702961921692, 0.21125544607639313, 0.3632965385913849, 0.42116600275039673, 0.29700127243995667, 0.2278410643339157, 0.2317150980234146, 0.30244436860084534, 0.32276564836502075, 0.25707629323005676, 0.24852260947227478, 0.24491029977798462, 0.2518414556980133, 0.35320255160331726, 0.2866332232952118, 0.2207552194595337, 0.2568267285823822, 0.24461865425109863, 0.20570527017116547, 0.2656995356082916, 0.21232444047927856, 0.2799481451511383, 0.18180416524410248, 0.2654014825820923, 0.262266606092453, 0.19924932718276978, 0.22213412821292877, 0.3075449764728546, 0.2290934920310974, 0.2963321805000305, 0.23535756766796112, 0.2430417388677597, 0.22808006405830383, 0.2716907560825348, 0.21096138656139374, 0.18565504252910614, 0.17213594913482666, 0.2755044996738434, 0.22538238763809204, 0.22792285680770874, 0.24877801537513733, 0.23092558979988098, 0.23993775248527527, 0.21917308866977692, 0.2535002529621124, 0.30203622579574585, 0.19476301968097687, 0.24782243371009827, 0.22699865698814392, 0.25022363662719727, 0.23006463050842285, 0.22317998111248016, 0.20648975670337677, 0.28253015875816345, 0.35304051637649536, 0.2882220447063446, 0.2875506281852722, 0.21613512933254242, 0.308322936296463, 0.29409125447273254, 0.3021804690361023, 0.273112416267395, 0.23458659648895264, 0.2998719811439514, 0.2715963125228882, 0.1898047924041748, 0.32565683126449585, 0.25560101866722107, 0.265905499458313, 0.3087238669395447, 0.2053961306810379, 0.20331673324108124 ] demo_im_names = os.listdir(args.images_dir) # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size ) for im_name in demo_im_names: img = cv2.imread(os.path.join(args.images_dir, im_name)) if img is None: continue start_time = time.time() composite = coco_demo.run_on_opencv_image(img) print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time)) cv2.imwrite(os.path.join(args.out_dir, im_name), composite) print("Press any keys to exit ...")
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="/home/w/workspace/DetectionHub/configs/global_wheat.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.7, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=224, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=0.5, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) result_dict = {} image_dir = '/media/w/Data/globel-wheat-detection/test' image_list = os.listdir(image_dir) for i in image_list: image = cv2.imread(os.path.join(image_dir, i)) start_time = time.time() image = cv2.resize(image, (600, 600), cv2.INTER_CUBIC) composite, predictions = coco_demo.run_on_opencv_image(image) result = [] for bbox, score in zip(predictions.bbox, predictions.get_field("scores")): bbox = bbox.cpu().numpy() score = score.cpu().numpy() bbox = resize2_1024(bbox) result.append(str(score)) for b in bbox: result.append(str(b)) result_dict[i.split('.')[0]] = ' '.join(result) print("Time: {:.2f} s / img".format(time.time() - start_time)) # cv2.imshow("COCO detections", composite) # if cv2.waitKey(0) == 27: # break # esc to quit with open("test.csv", "w") as csvfile: writer = csv.writer(csvfile) # 先写入columns_name writer.writerow(["image_id", "PredictionString"]) for key in result_dict.keys(): writer.writerow([key, result_dict[key]]) cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser(description="MaskRCNN Demo") parser.add_argument( "--image_dir", default="", type=str, help="path to images directory", ) parser.add_argument( "--config-file", default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.7, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=224, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) filenames = os.listdir(args.image_dir) for filename in filenames: img = cv2.imread(os.path.join(args.image_dir, filename)) if img is None: continue visualize, prediction, tracking = coco_demo.run_on_opencv_image(img) coco_demo.saveResults(filename, visualize, prediction, tracking)