示例#1
0
def main(name_scope, gpu_dev, num_images, args):
    t=args.t
    model = initialize_model_from_cfg()
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    temp_frame_folder = osp.join(args.out_path,args.vid_name + '_frames/',str(t))
    imgs = glob.glob(temp_frame_folder+'/*.jpg')
    for i in range(len(imgs)):
        if i%100==0:
          print('Processing Detection for Frame %d'%(i+1))
        im_ = cv2.imread(imgs[i])
        assert im_ is not None
        im_ = np.expand_dims(im_, 0)
        with core.NameScope(name_scope):
            with core.DeviceScope(gpu_dev):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im_, None)                                        #TODO: Parallelize detection

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

    det_name = args.vid_name + '_' + str(args.t) + '_detections.pkl'
    det_file = osp.join(args.out_path, det_name)
    robust_pickle_dump(dict(all_keyps=all_keyps),det_file)
    shutil.rmtree(osp.join(args.out_path, args.vid_name + '_frames'))
def localize_obj_in_image(path_to_image, maskRCNN, timers):
    '''
    Localize foreground objects in the image of 'path_to_image'.

    You may replace this function with a call to your localization method
    '''

    img = imread(path_to_image)
    base = basename(path_to_image)
    name = splitext(base)[0]

    h, w = img.shape[0:2]
    bbox = []
    cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                    img,
                                                    timers=timers)

    import pdb
    pdb.set_trace()
    for x in cls_boxes:
        print(x)
        break
    # randomly generate up to 5 bounding boxes for each image
    # with size between [50, 100)
    '''
    for k in range(np.random.randint(5)):

        label = classes[np.random.randint(11)]
        score = np.random.rand(1)[0]

        bb_w = np.random.randint(50, 100, 1)[0]  # width
        bb_h = np.random.randint(50, 100, 1)[0]  # height

        bb_x1 = np.random.randint(0, w-100, 1)[0]
        bb_y1 = np.random.randint(0, h-100, 1)[0]

        bb_x2 = bb_x1 + bb_w - 1
        bb_y2 = bb_y1 + bb_h - 1

        bbox.append([name, label, score, bb_x1, bb_y1, bb_x2, bb_y2])
    '''
    return bbox
示例#3
0
def test_cls_net(output_dir, ind_range=None, gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        ind_range, gt_cls=True)
    model = initialize_model_from_cfg(gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_cls = []
    all_labs = []
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):

        im = cv2.imread(entry['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            cls_scores, _, _ = im_detect_all(model, im, None, timers)

        all_cls.append(np.argmax(cls_scores))
        all_labs.append(entry["gt_classes"][0])

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            cls_time = (timers['classify_im'].average_time)
            logger.info(
                ('im_detect: range [{:d}, {:d}] of {:d}: '
                 '{:d}/{:d} {:.3f}s  (eta: {})').format(
                     start_ind + 1, end_ind, total_num_images,
                     start_ind + i + 1, start_ind + num_images, cls_time, eta))
    correct_pred = np.equal(np.array(all_cls), np.array(all_labs))
    acc = np.mean(correct_pred.astype(np.float32))
    return acc
示例#4
0
def test_net_Car3D(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, 'Use rpn_generate to generate proposals from RPN-only models'
    dataset = JsonDataset(dataset_name, args.dataset_dir)
    timers = defaultdict(Timer)

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(dataset, proposal_file, ind_range, args)
    num_images = len(roidb)
    image_ids = []
    if cfg.MODEL.TRANS_HEAD_ON:
        json_dir = os.path.join(output_dir, 'json_'+args.list_flag+'_trans')
    else:
        json_dir = os.path.join(output_dir, 'json_'+args.list_flag)

    json_dir += '_iou_' + str(args.iou_ignore_threshold)
    if not cfg.TEST.BBOX_AUG.ENABLED:
        json_dir += '_BBOX_AUG_single_scale'
    else:
        json_dir += '_BBOX_AUG_multiple_scale'

    if not cfg.TEST.CAR_CLS_AUG.ENABLED:
        json_dir += '_CAR_CLS_AUG_single_scale'
    else:
        json_dir += '_CAR_CLS_AUG_multiple_scale'

    if cfg.TEST.GEOMETRIC_TRANS:
        json_dir += '_GEOMETRIC_TRANS'

    if cfg.TEST.CAR_CLS_AUG.H_FLIP and cfg.TEST.CAR_CLS_AUG.SCALE_H_FLIP:
        json_dir += '_hflipped'

    roidb = roidb
    for i, entry in enumerate(roidb):
        image_ids.append(entry['image'])
    args.image_ids = image_ids

    all_boxes = [[[] for _ in range(num_images)] for _ in range(cfg.MODEL.NUM_CLASSES)]
    if ind_range is not None:
        if cfg.TEST.SOFT_NMS.ENABLED:
            det_name = 'detection_range_%s_%s_soft_nms' % tuple(ind_range)
        else:
            det_name = 'detection_range_(%d_%d)_nms_%.1f' % (ind_range[0], ind_range[1], cfg.TEST.NMS)
        if cfg.TEST.BBOX_AUG.ENABLED:
            det_name += '_multiple_scale'
        det_name += '.pkl'
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)

    file_complete_flag = [not os.path.exists(os.path.join(json_dir, entry['image'].split('/')[-1][:-4] + '.json')) for entry in roidb]
    # If we don't have the complete json file, we will load the model and execute the following:
    if np.sum(file_complete_flag) or not os.path.exists(det_file):
        model = initialize_model_from_cfg(args, gpu_id=gpu_id)
        for i in tqdm(range(len(roidb))):
            entry = roidb[i]
            if cfg.TEST.PRECOMPUTED_PROPOSALS:
                # The roidb may contain ground-truth rois (for example, if the roidb
                # comes from the training or val split). We only want to evaluate
                # detection on the *non*-ground-truth rois. We select only the rois
                # that have the gt_classes field set to 0, which means there's no
                # ground truth.
                box_proposals = entry['boxes'][entry['gt_classes'] == 0]
                if len(box_proposals) == 0:
                    continue
            else:
                # Faster R-CNN type models generate proposals on-the-fly with an
                # in-network RPN; 1-stage models don't require proposals.
                box_proposals = None

            im = cv2.imread(entry['image'])
            ignored_mask_img = os.path.join(('/').join(entry['image'].split('/')[:-2]), 'ignore_mask', entry['image'].split('/')[-1])
            ignored_mask = cv2.imread(ignored_mask_img, cv2.IMREAD_GRAYSCALE)
            ignored_mask_binary = np.zeros(ignored_mask.shape)
            ignored_mask_binary[ignored_mask > 250] = 1
            if cfg.MODEL.NON_LOCAL_TEST and not cfg.TEST.BBOX_AUG.ENABLED:
                cls_boxes_i, cls_segms_i, _, car_cls_i, euler_angle_i, trans_pred_i, f_div_C = im_detect_all(model, im, box_proposals, timers, dataset)
            else:
                cls_boxes_i, cls_segms_i, _, car_cls_i, euler_angle_i, trans_pred_i = im_detect_all(model, im, box_proposals, timers, dataset)
            extend_results(i, all_boxes, cls_boxes_i)

            # We draw the grid overlap with an image here
            if False:
                f_div_C_plot = f_div_C.copy()
                grid_size = 32  # This is the res5 output space
                fig = plt.figure()
                ax1 = fig.add_subplot(1, 2, 1)
                ax2 = fig.add_subplot(1, 2, 2)
                ax1.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
                ax1.grid(which='minor')

                # We choose the point here:
                # x, y = int(1757/grid_size), int(1040/grid_size)   # val 164
                x, y = int(1830/grid_size), int(1855/grid_size)

                # draw a patch hre
                rect = patches.Rectangle((x*grid_size-grid_size, y*grid_size-grid_size), grid_size*3, grid_size*3,
                                         linewidth=1, edgecolor='m', facecolor='m')
                ax1.add_patch(rect)
                #att_point_map = f_div_C_plot[106*x+y, :]
                att_point_map = f_div_C_plot[106*y+x, :]
                att_point_map = np.reshape(att_point_map, (85, 106))
                ax2.imshow(att_point_map, cmap='jet')

                # we draw 20 arrows
                for i in range(10):
                    x_max, y_max = np.unravel_index(att_point_map.argmax(), att_point_map.shape)
                    v = att_point_map[x_max, y_max]
                    att_point_map[x_max, y_max] = 0
                    ax1.arrow(x*grid_size, y*grid_size, (y_max-x)*grid_size, (x_max-y)*grid_size,
                              fc="r", ec="r", head_width=(10-i)*grid_size/2, head_length=grid_size)

            if i % 10 == 0:  # Reduce log file size
                ave_total_time = np.sum([t.average_time for t in timers.values()])
                eta_seconds = ave_total_time * (num_images - i - 1)
                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
                det_time = timers['im_detect_bbox'].average_time
                triple_head_time = timers['triple_head'].average_time
                misc_time = (
                    timers['misc_bbox'].average_time +
                    timers['misc_mask'].average_time
                )
                logger.info(
                    (
                        'im_detect: range [{:d}, {:d}] of {:d}: '
                        '{:d}/{:d} det-time: {:.3f}s + triple-head-time: {:.3f}s + misc_time: {:.3f}s (eta: {})'
                    ).format(
                        start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                        start_ind + num_images, det_time, triple_head_time, misc_time, eta
                    )
                )

            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.write_pose_to_json(
                im_name=im_name,
                output_dir=json_dir,
                boxes=cls_boxes_i,
                car_cls_prob=car_cls_i,
                euler_angle=euler_angle_i,
                trans_pred=trans_pred_i,
                segms=cls_segms_i,
                dataset=dataset.Car3D,
                thresh=cfg.TEST.SCORE_THRESH_FOR_TRUTH_DETECTION,
                ignored_mask_binary=ignored_mask_binary.astype('uint8'),
                iou_ignore_threshold=args.iou_ignore_threshold
            )

            if cfg.VIS:
                vis_utils.vis_one_image_eccv2018_car_3d(
                    im[:, :, ::-1],
                    '{:d}_{:s}'.format(i, im_name),
                    os.path.join(output_dir, 'vis_'+args.list_flag),
                    boxes=cls_boxes_i,
                    car_cls_prob=car_cls_i,
                    euler_angle=euler_angle_i,
                    trans_pred=trans_pred_i,
                    car_models=dataset.Car3D.car_models,
                    intrinsic=dataset.Car3D.get_intrinsic_mat(),
                    segms=cls_segms_i,
                    keypoints=None,
                    thresh=0.9,
                    box_alpha=0.8,
                    dataset=dataset.Car3D)

        save_object(dict(all_boxes=all_boxes), det_file)

    # The following evaluate the detection result from Faster-RCNN Head
    # If we have already computed the boxes
    if os.path.exists(det_file):
        obj = load_object(det_file)
        all_boxes = obj['all_boxes']

    # this is a hack
    if False:

        import glob
        det_files = sorted(glob.glob(args.output_dir+'/detection_range_*.pkl'))
        det_files = [det_files[4], det_files[1], det_files[2], det_files[3]]
        obj = load_object(det_files[0])
        all_boxes = obj['all_boxes']
        for df in det_files:
            obj = load_object(df)
            boxes = obj['all_boxes']
            for i in range(len(boxes)):
                all_boxes[i] = all_boxes[i] + boxes[i]
        save_object(dict(all_boxes=all_boxes), det_file)

    results = task_evaluation.evaluate_boxes(dataset, all_boxes, output_dir, args)

    # The following evaluate the mAP of car poses
    args.test_dir = json_dir
    args.gt_dir = args.dataset_dir + 'car_poses'
    args.res_file = os.path.join(output_dir, 'json_'+args.list_flag+'_res.txt')
    args.simType = None
    det_3d_metric = Detect3DEval(args)
    det_3d_metric.evaluate()
    det_3d_metric.accumulate()
    det_3d_metric.summarize()
示例#5
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    video_path = args.video
    video_save_dir = args.video_save_dir
    if not os.path.exists(video_save_dir):
        os.mkdir(video_save_dir)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU
    maskRCNN.eval()

    capture = cv2.VideoCapture(video_path)
    frame_count = 0
    batch_size = 1

    frame_list = []
    while True:
        ret, frame = capture.read()
        # Bail out when the video file ends
        if not ret:
            break
            # Save each frame of the video to a list
        frame_count += 1
        frame_list.append(frame)
        if len(frame_list) == batch_size:
            for i, frame in enumerate(frame_list):
                timers = defaultdict(Timer)
                cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                                frame,
                                                                timers=timers)

                name = '{0}'.format(frame_count + i - batch_size)
                print('\n' + name)

                vis_utils.vis_one_image(
                    frame[:, :, ::-1],  # BGR -> RGB for visualization
                    name,
                    video_save_dir,
                    cls_boxes,
                    cls_segms,
                    cls_keyps,
                    dataset=dataset,
                    box_alpha=0.3,
                    show_class=True,
                    thresh=0.8,
                    kp_thresh=2,
                    ext='jpg')

            # Clear the frames array to start the next batch
            frame_list = []

    images = list(glob.iglob(os.path.join(video_save_dir, '*.jpg')))
    # Sort the images by integer index
    images = sorted(images, key=lambda x: float(os.path.split(x)[1][:-3]))

    outvid = os.path.join(video_save_dir, "out.mp4")
    make_video(outvid, images, fps=30)

    extract_audio = 'ffmpeg -i %s -vn -acodec copy %s/out.aac' % (
        video_path, video_save_dir)
    subprocess.call(extract_audio, shell=True)

    merge_audio_video = "ffmpeg -i %s/out.aac -i %s/out.mp4 -codec copy -shortest %s/final.mp4" % (
        video_save_dir, video_save_dir, video_save_dir)
    subprocess.call(merge_audio_video, shell=True)
示例#6
0
def test_net(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, 'Use rpn_generate to generate proposals from RPN-only models'
    dataset = JsonDataset(dataset_name, args.dataset_dir)
    timers = defaultdict(Timer)
    if ind_range is not None:
        if cfg.TEST.SOFT_NMS.ENABLED:
            det_name = 'detection_range_%s_%s_soft_nms.pkl' % tuple(ind_range)
        else:
            det_name = 'detection_range_(%d_%d)_nms_%.1f.pkl' % (ind_range[0], ind_range[1], cfg.TEST.NMS)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(dataset, proposal_file, ind_range, args)
    num_images = len(roidb)
    image_ids = []
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)

    for i, entry in enumerate(roidb):
        image_ids.append(entry['image'])
    args.image_ids = image_ids

    # If we have already computed the boxes
    if os.path.exists(det_file):
        obj = load_object(det_file)
        all_boxes, all_segms, all_keyps = obj['all_boxes'], obj['all_segms'], obj['all_keyps']

    else:
        model = initialize_model_from_cfg(args, gpu_id=gpu_id)
        for i, entry in enumerate(roidb):
            if cfg.TEST.PRECOMPUTED_PROPOSALS:
                # The roidb may contain ground-truth rois (for example, if the roidb
                # comes from the training or val split). We only want to evaluate
                # detection on the *non*-ground-truth rois. We select only the rois
                # that have the gt_classes field set to 0, which means there's no
                # ground truth.
                box_proposals = entry['boxes'][entry['gt_classes'] == 0]
                if len(box_proposals) == 0:
                    continue
            else:
                # Faster R-CNN type models generate proposals on-the-fly with an
                # in-network RPN; 1-stage models don't require proposals.
                box_proposals = None

            im = cv2.imread(entry['image'])
            cls_boxes_i, cls_segms_i, cls_keyps_i, car_cls_i, euler_angle_i, trans_pred_i = im_detect_all(model, im, box_proposals, timers, dataset)
            extend_results(i, all_boxes, cls_boxes_i)
            if cls_segms_i is not None:
                extend_results(i, all_segms, cls_segms_i)
            if cls_keyps_i is not None:
                extend_results(i, all_keyps, cls_keyps_i)

            if i % 10 == 0:  # Reduce log file size
                ave_total_time = np.sum([t.average_time for t in timers.values()])
                eta_seconds = ave_total_time * (num_images - i - 1)
                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
                det_time = (
                    timers['im_detect_bbox'].average_time +
                    timers['im_detect_mask'].average_time +
                    timers['im_detect_keypoints'].average_time
                )
                misc_time = (
                    timers['misc_bbox'].average_time +
                    timers['misc_mask'].average_time +
                    timers['misc_keypoints'].average_time
                )
                logger.info(
                    (
                        'im_detect: range [{:d}, {:d}] of {:d}: '
                        '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                    ).format(
                        start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                        start_ind + num_images, det_time, misc_time, eta
                    )
                )

            if cfg.VIS:
                im_name = os.path.splitext(os.path.basename(entry['image']))[0]
                vis_utils.vis_one_image_eccv2018_car_3d(
                    im[:, :, ::-1],
                    '{:d}_{:s}'.format(i, im_name),
                    os.path.join(output_dir, 'vis'),
                    boxes=cls_boxes_i,
                    car_cls_prob=car_cls_i,
                    euler_angle=euler_angle_i,
                    trans_pred=trans_pred_i,
                    car_models=dataset.Car3D.car_models,
                    intrinsic=dataset.Car3D.get_intrinsic_mat(),
                    segms=cls_segms_i,
                    keypoints=cls_keyps_i,
                    thresh=0.9,
                    box_alpha=0.8,
                    dataset=dataset.Car3D)
        cfg_yaml = yaml.dump(cfg)
        save_object(
            dict(
                all_boxes=all_boxes,
                all_segms=all_segms,
                all_keyps=all_keyps,
                cfg=cfg_yaml
            ), det_file
        )
        logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))

    results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir, args)
    return results
示例#7
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        vis_utils.vis_one_image(
            im[:, :, ::-1],  # BGR -> RGB for visualization
            im_name,
            args.output_dir,
            cls_boxes,
            cls_segms,
            cls_keyps,
            dataset=dataset,
            box_alpha=0.3,
            show_class=True,
            thresh=0.7,
            kp_thresh=2)

    if args.merge_pdfs and num_images > 1:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
示例#8
0
from __future__ import absolute_import
示例#9
0
def test_net(args,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    #assert not cfg.MODEL.RPN_ONLY, \
    #    'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = io.imread(entry['image'])
        im_name = entry['image'].split('/')[-1][:-4]
        if cfg.TEST.DATASETS == ('nuclei_det_seg_train', ):
            track = entry['image'].split('/')[-2]
            det_file = os.path.join(output_dir, track + '_' + im_name + '.pkl')
        else:
            det_file = os.path.join(output_dir, im_name + '.pkl')
        cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
            model, im, box_proposals, timers)
        save_object(
            dict(
                all_boxes=cls_boxes_i,
                all_segms=cls_segms_i,
                #all_keyps=all_keyps,
                #cfg=cfg_yaml
            ),
            det_file)
        logger.info('Wrote detections to: {}'.format(
            os.path.abspath(det_file)))

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))

    # det_file = os.path.join(output_dir, det_name)
    '''save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml
        ), det_file
    )'''
    #logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#10
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True, device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        vis_utils.vis_one_image(
            im[:, :, ::-1],  # BGR -> RGB for visualization
            im_name,
            args.output_dir,
            cls_boxes,
            cls_segms,
            cls_keyps,
            dataset=dataset,
            box_alpha=0.3,
            show_class=True,
            thresh=0.7,
            kp_thresh=2
        )

    if args.merge_pdfs and num_images > 1:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
示例#11
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    elif args.dataset.startswith("keypoints_carfusion"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))




    assert bool(args.load_ckpt_car) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'

    print('load cfg from file: {}'.format(args.cfg_file_person))
    cfg_from_file(args.cfg_file_person)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.RESNETS.IMAGENET_PRETRAINED = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN_person = Generalized_RCNN()

    #print('load cfg from file: {}'.format(args.cfg_file_person))
    #cfg_from_file(args.cfg_file_person)
    #assert_and_infer_cfg()
    #maskRCNN_person = Generalized_RCNN()
    if args.visualize:
        save_image = True

    if args.cuda:
        maskRCNN_person.cuda()

    if args.load_ckpt_person:
        load_name = args.load_ckpt_person
        print("loading checkpoint for person %s" % (load_name))
        checkpoint_person = torch.load(
            load_name, map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN_person, checkpoint_person['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN_car, args.load_detectron)

    maskRCNN_person = mynn.DataParallel(
        maskRCNN_person,
        cpu_keywords=['im_info', 'roidb'],
        minibatch=True,
        device_ids=[0])  # only support single GPU

    maskRCNN_person.eval()

    print('load cfg from file: {}'.format(args.cfg_file_car))
    cfg_from_file(args.cfg_file_car)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    #cfg.RESNETS.IMAGENET_PRETRAINED = False  # Don't need to load imagenet pretrained weights
    #assert_and_infer_cfg()

    maskRCNN_car = Generalized_RCNN()

    if args.cuda:
        maskRCNN_car.cuda()

    if args.load_ckpt_car:
        load_name = args.load_ckpt_car
        print("loading checkpoint for car %s" % (load_name))
        checkpoint_car = torch.load(load_name,
                                    map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN_car, checkpoint_car['model'])

    maskRCNN_car = mynn.DataParallel(maskRCNN_car,
                                     cpu_keywords=['im_info', 'roidb'],
                                     minibatch=True,
                                     device_ids=[0])  # only support single GPU
    maskRCNN_car.eval()

    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    #imglist.sort(key=lambda f: int(filter(str.isdigit, f)))
    l = imglist
    try:
        imglist = sorted(
            l, key=lambda x: int(os.path.splitext(x)[0].split('/')[-1]))
    except:
        print('images couldnot be sorted')
    for i in xrange(num_images):
        print('img', i, ' out of ', num_images, ' filename: ',
              imglist[i].split('/')[-1], ' in camera',
              imglist[i].split('/')[-2])
        im = cv2.imread(imglist[i])
        try:
            assert im is not None
        except:
            continue
        timers = defaultdict(Timer)
        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        #print(im_name)

        output_name = os.path.basename(im_name) + '.txt'
        output_file = os.path.join(args.output_dir, '{}'.format(output_name))

        text_file = open(output_file, "w")
        cfg_from_file(args.cfg_file_car)
        cls_boxes_car, cls_segms_car, cls_keyps_car, features_car = im_detect_all(
            maskRCNN_car, im, timers=timers)
        if len(cls_boxes_car[1]) > 0:
            features_car = features_car.data.cpu().numpy()

        #print(loop,loop2)

        #print(distance_matrix)
        #plt.figure()
        #plot_confusion_matrix(distance_matrix, classes=[0,1,2,3,4,5,6],
        #title='Confusion matrix, without normalization')
        #fig = plt.figure()
        #ax = fig.add_subplot(1,1,1)
        #ax.set_aspect('equal')
        #plt.imshow(distance_matrix, interpolation='nearest', cmap=plt.cm.ocean)
        #plt.colorbar()
        #plt.show()
        #fig.savefig('1.png')

        count = 0
        filename = 'finalized_model.txt'
        loaded_model = pickle.load(open(filename, 'rb'))
        for ind, bb in enumerate(cls_boxes_car[1]):
            string = str(count)
            keyps = [k for klist in cls_keyps_car for k in klist]

            bb_new = [bb[0], bb[1], bb[2] - bb[0], bb[3] - bb[1]]
            features = features_car[ind, :, :, :].flatten()
            pca_feature = []
            pca_feature.append(np.transpose(features.astype(np.float)))
            #print(pca_feature)
            features = loaded_model.transform(pca_feature)
            features = features[0]  #loaded_model.transform(pca_feature)
            if bb[4] < 0.5:
                continue
            #for bb_ind,val in enumerate(bb_new):
            #     string = string + ',' + str(val)
            for kp_ind, kp in enumerate(keyps[ind][0]):
                string = string + ',' + str(kp) + ',' + str(
                    keyps[ind][1][kp_ind]) + ',' + str(
                        int(keyps[ind][2][kp_ind]))
            for feature_ind, feature in enumerate(features):
                string = string + ',' + str(feature)
            string = string + ',car'
            text_file.write(string)
            text_file.write('\n')
            #print(string)
            count = count + 1
        cfg_from_file(args.cfg_file_person)
        cls_boxes_person, cls_segms_person, cls_keyps_person, features_person = im_detect_all(
            maskRCNN_person, im, timers=timers)
        if len(cls_boxes_person[1]) > 0:
            features_person = features_person.data.cpu().numpy()
        for ind, bb in enumerate(cls_boxes_person[1]):
            string = str(count)
            keyps = [k for klist in cls_keyps_person for k in klist]

            bb_new = [bb[0], bb[1], bb[2] - bb[0], bb[3] - bb[1]]
            features = features_person[ind, :, :, :].flatten()
            pca_feature = []
            pca_feature.append(np.transpose(features.astype(np.float)))
            #print(pca_feature)
            features = loaded_model.transform(pca_feature)
            features = features[0]  #loaded_model.transform(pca_feature)

            #features = loaded_model.transform(np.transpose(features.astype(np.float)))
            #            print(features)
            if bb[4] < 0.5:
                continue
            for bb_ind, val in enumerate(bb_new):
                string = string + ',' + str(val)
            for kp_ind, kp in enumerate(keyps[ind][0]):
                string = string + ',' + str(kp) + ',' + str(
                    keyps[ind][1][kp_ind]) + ',' + str(
                        int(keyps[ind][2][kp_ind]))
            for feature_ind, feature in enumerate(features):
                string = string + ',' + str(feature)
            string = string + ',person'
            text_file.write(string)
            text_file.write('\n')
            #print(string)
            count = count + 1

        if save_image == True:
            im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
            image_car = vis_utils.vis_one_image_car(
                im[:, :, ::-1],  # BGR -> RGB for visualization
                im_name,
                args.output_dir,
                cls_boxes_car,
                cls_segms_car,
                cls_keyps_car,
                dataset=dataset,
                box_alpha=0.3,
                show_class=True,
                thresh=0.5,
                kp_thresh=0.1)
            output_name = os.path.basename(im_name) + '.png'
            im = cv2.imread(
                os.path.join(args.output_dir, '{}'.format(output_name)))
            if im is None:
                continue
            continue
            vis_utils.vis_one_image(
                im[:, :, ::-1],  # BGR -> RGB for visualization
                im_name,
                args.output_dir,
                cls_boxes_person,
                cls_segms_person,
                cls_keyps_person,
                dataset=dataset,
                box_alpha=0.3,
                show_class=True,
                thresh=0.5,
                kp_thresh=10)

    if args.merge_pdfs and num_images > 1 and save_image == True:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
示例#12
0
def test_net(args,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0,
             active_model=None,
             step=None):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    if active_model is None:
        model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    else:
        model = active_model

    if 'train' in dataset_name:
        mode = 'train'
    elif 'val' in dataset_name:
        mode = 'val'
    elif 'test' in dataset_name:
        mode = 'test'
    else:
        raise Exception

    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    # num_images = 5
    all_boxes, all_segms, all_keyps, all_hois, all_keyps_vcoco = empty_results(
        num_classes, num_images)
    timers = defaultdict(Timer)
    all_losses = defaultdict(list)
    for i, entry in enumerate(roidb):

        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None
        # h, w, c
        im = cv2.imread(entry['image'])

        if not cfg.VCOCO.USE_PRECOMP_BOX:
            cls_boxes_i, cls_segms_i, cls_keyps_i, hoi_res_i, vcoco_cls_keyps_i, loss_i = \
                im_detect_all(model, im, box_proposals, timers, entry)
        else:
            cls_boxes_i, cls_segms_i, cls_keyps_i, hoi_res_i, vcoco_cls_keyps_i, loss_i = \
                im_detect_all_precomp_box(model, im, timers, entry, mode, dataset.json_category_id_to_contiguous_id)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)
        if hoi_res_i is not None:
            all_hois[entry['id']] = hoi_res_i
        if vcoco_cls_keyps_i is not None:
            extend_results(i, all_keyps_vcoco, vcoco_cls_keyps_i)

        if loss_i['interaction_action_loss'] is not None:
            for k, v in loss_i.items():
                all_losses[k].append(v)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        if step is None:
            det_name = 'detections.pkl'
        else:
            det_name = 'detections_step{}.pkl'.format(step)
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             all_hois=all_hois,
             all_keyps_vcoco=all_keyps_vcoco,
             all_losses=all_losses,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps, all_hois, all_keyps_vcoco, all_losses
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    prefix_path = args.output_dir + '_results'

    if os.path.exists(prefix_path):
        shutil.rmtree(prefix_path)
        os.mkdir(prefix_path)
    else:
        os.mkdir(prefix_path)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)

    for i in tqdm(range(num_images)):
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))

        boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms,
                                                       cls_keyps)
        if classes == []:
            continue
        voc_boxes = np.zeros_like(boxes)
        voc_boxes[:, 0:1] = boxes[:, 4:5]
        voc_boxes[:, 1:3] = boxes[:, 0:2] + 1
        voc_boxes[:, 3:5] = boxes[:, 2:4] + 1

        for instance_idx, cls_idx in enumerate(classes):
            cls_name = dataset.classes[cls_idx]
            if cls_name == 'motorcycle':
                cls_name = 'motorbike'
            f = open(os.path.join(prefix_path, cls_name + ".txt"), "a+")
            f.write("%s " % im_name)
            for item in voc_boxes[instance_idx]:
                f.write("%f " % item)
            f.write("\n")
            f.close()
def main():
    """main function"""

    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()

    params = list(maskRCNN.parameters())
    k = 0
    for i in params:
        l = 1
        for j in i.size():
            l *= j
        k = k + l
    print('zonghe:' + str(k))

    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)
        start = time.time()
        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)
        class_result_boxes = []
        for index, class_boxes in enumerate(cls_boxes):
            if len(class_boxes) != 0:
                class_boxes = class_boxes.tolist()
                results_oneclass = threeD_detect(imglist[i], class_boxes,
                                                 index)
                class_result_boxes.append(results_oneclass)
        save_image = im
        color_class = {
            'Car': [0, 255, 255],
            'Cyclist': [255, 0, 0],
            'Pedestrian': [0, 0, 255]
        }
        for result_boxes in class_result_boxes:
            for box in result_boxes:
                cv2.rectangle(save_image, (box[0], box[1]), (box[2], box[3]),
                              color_class[box[-1]], 2)
                height = round(box[-2][0], 2)
                width = round(box[-2][1], 2)
                length = round(box[-2][2], 2)
                threeD_info = str(height) + ' ' + str(width) + ' ' + str(
                    length)
                cv2.putText(save_image, threeD_info, (box[0], box[1] - 20),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 2)
                _, imagename = os.path.split(imglist[i])
                imagename2 = imagename.split('.')[0]
                cv2.imwrite('../output1/%s.png' % imagename2, save_image)

        end = time.time()
        print(end - start)
        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        vis_utils.vis_one_image(
            im[:, :, ::-1],  # BGR -> RGB for visualization
            im_name,
            args.output_dir,
            cls_boxes,
            cls_segms,
            cls_keyps,
            dataset=dataset,
            box_alpha=0.3,
            show_class=True,
            thresh=0.7,
            kp_thresh=2)

    if args.merge_pdfs and num_images > 1:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
示例#15
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)
    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    #Do not use RPN.
    #cfg.MODEL.FASTER_RCNN = False

    dataset = datasets.get_coco_dataset()
    cfg.MODEL.NUM_CLASSES = len(dataset.classes)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()
    train_db = davis_db.DAVIS_imdb(split='val')
    for seq_idx in range(train_db.get_num_sequence()):
        train_db.set_to_sequence(seq_idx)
        seq_name = train_db.get_current_seq_name()
        save_dir = osp.join(args.output_dir, seq_name)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        else:
            merge_out_path = '{}/results.pdf'.format(save_dir)
            if osp.exists(merge_out_path):
                continue
        for idx in range(train_db.get_current_seq_length()):
            im_name = '%02d.pdf' % (idx)
            print(osp.join(save_dir, im_name))
            if osp.exists(osp.join(save_dir, im_name)):
                continue
            im = train_db.get_image_cv2(idx)
            boxes = train_db.get_bboxes(idx)

            new_boxes = []
            for bbox in boxes:
                new_box = []
                new_box.extend(bbox)
                new_box[2] = new_box[0] + new_box[2]
                new_box[3] = new_box[1] + new_box[3]
                new_boxes.append(new_box)

            boxes = np.array(new_boxes, np.float32)
            print(boxes.shape)
            if boxes.shape[0] > 0:
                device = torch.device(
                    "cuda:0" if torch.cuda.is_available() else "cpu")
                #boxes = torch.tensor(boxes,device = device)

                maskRCNN_predictor_with_boxes = Generalized_RCNN_Predictor_with_Boxes(
                )

                if args.cuda:
                    maskRCNN_predictor_with_boxes.cuda()

                if args.load_ckpt:
                    load_name = args.load_ckpt
                    print("loading checkpoint %s" % (load_name))
                    checkpoint = torch.load(
                        load_name, map_location=lambda storage, loc: storage)
                    net_utils.load_ckpt(maskRCNN_predictor_with_boxes,
                                        checkpoint['model'])

                if args.load_detectron:
                    print("loading detectron weights %s" % args.load_detectron)
                    load_detectron_weight(maskRCNN_predictor_with_boxes,
                                          args.load_detectron)

                maskRCNN_predictor_with_boxes = mynn.DataParallel(
                    maskRCNN_predictor_with_boxes,
                    cpu_keywords=['im_info', 'roidb'],
                    minibatch=True,
                    device_ids=[0])  # only support single GPU

                maskRCNN_predictor_with_boxes.eval()

                assert im is not None
                timers = defaultdict(Timer)

                cls_boxes, cls_segms, cls_keyps = im_detect_all(
                    maskRCNN_predictor_with_boxes,
                    im,
                    timers=timers,
                    box_proposals=boxes)
            else:
                cls_boxes = None
                cls_segms = None
                cls_keyps = None
            im_name = '%02d' % (idx)

            vis_utils.vis_one_image(
                im[:, :, ::-1],  # BGR -> RGB for visualization
                im_name,
                save_dir,
                cls_boxes,
                cls_segms,
                cls_keyps,
                dataset=dataset,
                box_alpha=0.3,
                show_class=True,
                thresh=0.01,
                kp_thresh=2)
        if args.merge_pdfs:
            merge_out_path = '{}/results.pdf'.format(save_dir)
            if os.path.exists(merge_out_path):
                os.remove(merge_out_path)
            command = "pdfunite {}/*.pdf {}".format(save_dir, merge_out_path)
            subprocess.call(command, shell=True)
示例#16
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    elif args.dataset.startswith("gangjin"):
        dataset = datasets.get_gangjin_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    img_ids = []
    rects = []

    for i in range(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
            cls_boxes, cls_segms, cls_keyps)
        if boxes is not None:
            for j in range(len(boxes)):
                # print(boxes[j][-1])
                if float(boxes[j][-1]) < 0.99:  # 阀值
                    continue
                xmin = float(boxes[j, 0])
                xmax = float(boxes[j, 2])
                ymin = float(boxes[j, 1])
                ymax = float(boxes[j, 3])
                img_ids.append(os.path.basename(imglist[i]))
                rects.append(
                    str(xmin) + " " + str(ymin) + " " + str(xmax) + " " +
                    str(ymax))

        # im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        # vis_utils.vis_one_image(
        #     im[:, :, ::-1],  # BGR -> RGB for visualization
        #     im_name,
        #     args.output_dir,
        #     cls_boxes,
        #     cls_segms,
        #     cls_keyps,
        #     dataset=dataset,
        #     box_alpha=0.3,
        #     show_class=False,
        #     thresh=0.99,
        #     kp_thresh=2,
        #     ext="jpg"
        # )

    result_dict = {"ID": img_ids, "rects": rects}
    import pandas as pd
    result = pd.DataFrame.from_dict(result_dict)

    result.to_csv('submit/submit1.csv', header=None, index=False)
示例#17
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    prefix_path = args.output_dir

    os.makedirs(prefix_path, exist_ok=True)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)

    writen_results = []

    # validate
    demo_im = cv2.imread(imglist[0])
    print(np.shape(demo_im))
    h, w, _ = np.shape(demo_im)
    #print(h)
    #print(args.height)
    assert h == args.height
    assert w == args.width
    h_scale = 720 / args.height
    w_scale = 1280 / args.width

    for i in tqdm(range(num_images)):
        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))

        # boxs = [[x1, y1, x2, y2, cls], ...]
        boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms,
                                                       cls_keyps)

        if boxes is None:
            continue
        # scale
        boxes[:, 0] = boxes[:, 0] * w_scale
        boxes[:, 2] = boxes[:, 2] * w_scale
        boxes[:, 1] = boxes[:, 1] * h_scale
        boxes[:, 3] = boxes[:, 3] * h_scale

        if classes == []:
            continue

        for instance_idx, cls_idx in enumerate(classes):
            cls_name = dataset.classes[cls_idx]
            if cls_name == 'motorcycle':
                cls_name = 'motor'
            elif cls_name == 'stop sign':
                cls_name = 'traffic sign'
            elif cls_name == 'bicycle':
                cls_name = 'bike'
            if cls_name not in bdd_category:
                continue

            writen_results.append({
                "name": imglist[i].split('/')[-1],
                "timestamp": 1000,
                "category": cls_name,
                "bbox": boxes[instance_idx, :4],
                "score": boxes[instance_idx, -1]
            })

    with open(os.path.join(prefix_path, args.name + '.json'),
              'w') as outputfile:
        json.dump(writen_results, outputfile, cls=MyEncoder)
示例#18
0
def test_net(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])
        cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time +
                timers['im_detect_mask'].average_time +
                timers['im_detect_keypoints'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time +
                timers['misc_mask'].average_time +
                timers['misc_keypoints'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'),
                cls_boxes_i,
                segms=cls_segms_i,
                keypoints=cls_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#19
0
def test_net(args,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    full_roidb, dataset, start_ind, end_ind, total_num_images, total_num_cls, support_dict = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)

    base_real_index = full_roidb[start_ind]['real_index']
    roidb = full_roidb[start_ind:end_ind]

    index_ls = []
    for item in roidb:
        index_ls.append(item['real_index'])
    num_annotations = len(roidb)
    num_images = len(list(set(index_ls)))
    num_classes = total_num_cls  #cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    print('part:', num_images)

    timers = defaultdict(Timer)

    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        # Get support box
        index = entry['index']
        assert len(list(set(entry['gt_classes']))) == 1
        query_cls = list(set(entry['gt_classes']))[
            0]  #index_pd.loc[index_pd['index']==index, 'cls_ls'].tolist()[0]
        query_img = entry[
            'image']  #index_pd.loc[index_pd['index']==index, 'img_ls'].tolist()[0]

        all_cls = support_dict[query_cls]['all_cls']

        support_way = 5
        support_shot = 5
        support_data_all = np.zeros((support_way * support_shot, 3, 320, 320),
                                    dtype=np.float32)
        support_box_all = np.zeros((support_way * support_shot, 4),
                                   dtype=np.float32)
        support_cls_ls = []

        for cls_id, cls in enumerate(all_cls):
            begin = cls_id * support_shot
            end = (cls_id + 1) * support_shot
            support_data_all[begin:end] = support_dict[cls]['img']
            support_box_all[begin:end] = support_dict[cls]['box']
            support_cls_ls.append(cls)

        save_path = './vis'
        im = cv2.imread(entry['image'])
        cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
            model, im, support_data_all, support_box_all, support_cls_ls,
            support_shot, save_path, box_proposals, timers)
        #real_index = entry['real_index']

        real_index = entry['real_index'] - base_real_index
        cls_boxes_i = cls_boxes_i[1]
        for cls in support_cls_ls:
            extend_support_results(
                real_index, all_boxes,
                cls_boxes_i[cls_boxes_i[:, 5] == cls][:, :5], cls)

        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_annotations - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_annotations,
                             det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(im[:, :, ::-1],
                                    '{:d}_{:s}'.format(i, im_name),
                                    os.path.join(output_dir, 'vis'),
                                    cls_boxes_i,
                                    segms=cls_segms_i,
                                    keypoints=cls_keyps_i,
                                    thresh=cfg.VIS_TH,
                                    box_alpha=0.8,
                                    dataset=dataset,
                                    show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset == "pascal_parts_heads":
        dataset = datasets.get_head_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    elif args.dataset == "scuthead_a":
        dataset = datasets.get_head_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):

        im = cv2.imread(imglist[i])
        assert im is not None

        timers = defaultdict(Timer)

        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        outputfile = os.path.join(args.output_dir, im_name)

        head_boxes = cls_boxes[1]
        print('img :', i, '   num_heads :', len(head_boxes), ' img_path :',
              imglist[i])
        np.save(outputfile, head_boxes)
示例#21
0
def test_net(args,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    if cfg.LESION.USE_POSITION:
        true_pos = 0

    if cfg.TEST.OFFLINE_MAP and (cfg.DATA_SOURCE == 'mammo'
                                 or cfg.DATA_SOURCE == 'lesion'):
        if osp.exists(os.path.join(output_dir, 'ground-truth')):
            shutil.rmtree(os.path.join(output_dir, 'ground-truth'))
        os.makedirs(os.path.join(output_dir, 'ground-truth'))
        if osp.exists(os.path.join(output_dir, 'predicted')):
            shutil.rmtree(os.path.join(output_dir, 'predicted'))
        os.makedirs(os.path.join(output_dir, 'predicted'))

    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None
        if cfg.DATA_SOURCE == 'coco':
            if cfg.LESION.LESION_ENABLED:
                im = load_multislice_16bit_png(roidb[i])
            else:
                im = cv2.imread(roidb[i]['image'])
        elif cfg.DATA_SOURCE == 'mammo':
            im, shape = get_a_img(roidb[i])

        if (cfg.MODEL.LR_VIEW_ON or cfg.MODEL.GIF_ON
                or cfg.MODEL.LRASY_MAHA_ON) and cfg.DATA_SOURCE == 'mammo':
            other_im = get_b_img(entry, im)
            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                model, [im, other_im], box_proposals, timers)
        else:
            if cfg.LESION.USE_POSITION:
                cls_boxes_i, cls_segms_i, cls_keyps_i, return_dict = im_detect_all(
                    model, im, box_proposals, timers)
                bins = np.array((0.58, 0.72, 1))
                bin_pred = np.argmax(
                    return_dict['pos_cls_pred'][0].data.cpu().numpy())
                bin_gt = np.digitize(entry['z_position'], bins)
                if bin_gt == bin_pred:
                    true_pos += 1
            else:
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers)

        if cfg.TEST.OFFLINE_MAP and cfg.DATA_SOURCE == 'mammo':
            gt_bboxes = get_vis_gt_bboxes(entry)
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            with open(
                    os.path.join(output_dir, 'ground-truth', im_name + '.txt'),
                    'w') as w:
                for gt_bbox in gt_bboxes[0]:
                    w.write('mass %d %d %d %d\n' %
                            (gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]))
                for gt_bbox in gt_bboxes[1]:
                    w.write('mass %d %d %d %d difficult\n' %
                            (gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]))
                    #w.write('mass %d %d %d %d\n'%(gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]))
            with open(os.path.join(output_dir, 'predicted', im_name + '.txt'),
                      'w') as w:
                for idx in range(cls_boxes_i[1].shape[0]):
                    w.write('mass %.5f %d %d %d %d\n' %
                            (cls_boxes_i[1][idx][4], cls_boxes_i[1][idx][0],
                             cls_boxes_i[1][idx][1], cls_boxes_i[1][idx][2],
                             cls_boxes_i[1][idx][3]))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            if cfg.TEST.VIS_TEST_ONLY:
                vis_utils.vis_one_image(im[:, :, ::-1].astype('uint8'),
                                        '{:d}_{:s}'.format(i, im_name),
                                        os.path.join(output_dir, 'vis'),
                                        cls_boxes_i,
                                        segms=cls_segms_i,
                                        keypoints=cls_keyps_i,
                                        thresh=cfg.VIS_TH,
                                        box_alpha=0.8,
                                        dataset=dataset,
                                        show_class=True)
            else:
                if cfg.DATA_SOURCE == 'coco':  #or cfg.DATA_SOURCE == 'lesion':
                    other_im_show = im[:, :, ::-1].astype('uint8')
                    if cfg.TEST.VIS_SINGLE_SLICE:
                        other_im_show = cv2.merge([
                            other_im_show[:, :, 1], other_im_show[:, :, 1],
                            other_im_show[:, :, 1]
                        ])
                    gt_boxes_show = [entry['boxes'].tolist(), []]
                    gt_classes = [entry['gt_classes'].tolist(), []]
                elif cfg.DATA_SOURCE == 'mammo':
                    if (cfg.MODEL.LR_VIEW_ON or cfg.MODEL.GIF_ON
                            or cfg.MODEL.LRASY_MAHA_ON):
                        other_im_show = other_im[:, :, ::-1].astype('uint8')
                    else:
                        other_im_show = im[:, :, ::-1].astype('uint8')
                    gt_boxes_show = get_vis_gt_bboxes(entry)
                    gt_classes = None
                im_show = im[:, :, ::-1].astype('uint8')
                if cfg.TEST.VIS_SINGLE_SLICE:
                    im_show = cv2.merge(
                        [im_show[:, :, 1], im_show[:, :, 1], im_show[:, :, 1]])
                vis_gt_utils.vis_one_image(im_show,
                                           other_im_show,
                                           '{:d}_{:s}'.format(i, im_name),
                                           os.path.join(output_dir, 'vis'),
                                           cls_boxes_i,
                                           gt_boxes_show,
                                           segms=cls_segms_i,
                                           thresh=cfg.VIS_TH,
                                           box_alpha=0.8,
                                           dataset=dataset,
                                           show_class=True,
                                           gt_classes=gt_classes)

        if cfg.DATA_SOURCE == 'mammo':
            cls_boxes_i = unalign_boxes(
                entry, shape, cls_boxes_i)  # cls_boxes_i[c]:array n x 5

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            if cfg.DATA_SOURCE == 'mammo':
                cls_segms_i = unalign_segms(entry, shape, cls_segms_i)
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 500 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))
    if cfg.TEST.OFFLINE_MAP and cfg.DATA_SOURCE == 'mammo':
        os.system(
            "python ./lib/datasets/map_evaluator.py --output_dir=%s -na -np" %
            output_dir)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    if cfg.LESION.USE_POSITION:
        print('####' * 10, true_pos, '/', num_images)
        print('position acc: ', float(true_pos) / num_images)
    return all_boxes, all_segms, all_keyps
def test_net(args,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        timers['im_load'].tic()
        # Shape (w, h, 3 * DATA_LOADER.NUM_INPUTS)
        im = entry['dataset'].load_image(entry)
        im = pack_sequence(im)
        timers['im_load'].toc()
        cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
            model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(entry['image'])[0].replace(
                os.path.sep, '_')
            # im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            # Use the first input (in case DATA_LOADER.NUM_INPUTS > 1)
            im_vis = im[:, :, :3]
            vis_utils.vis_one_image(im_vis[:, :, ::-1],
                                    '{:d}_{:s}'.format(i, im_name),
                                    os.path.join(output_dir, 'vis'),
                                    cls_boxes_i,
                                    segms=cls_segms_i,
                                    keypoints=cls_keyps_i,
                                    thresh=cfg.VIS_TH,
                                    box_alpha=0.8,
                                    dataset=dataset,
                                    show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)

    if hasattr(args, 'objectness_eval') and args.objectness_eval:
        all_boxes, all_segms, all_keyps = collapse_categories(
            all_boxes, all_segms, all_keyps)

    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#23
0
def test_net(ind_range=None):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    output_dir = get_output_dir(training=False)
    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        ind_range
    )
    model = initialize_model_from_cfg()
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps, \
    all_refined_segms, all_refined_keyps = \
        empty_results(num_classes, num_images)

    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.MODEL.FASTER_RCNN:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue

        im = cv2.imread(entry['image'])
        with c2_utils.NamedCudaScope(0):
            cls_boxes_i, cls_segms_i, cls_keyps_i, \
            cls_refined_segms_i, cls_refined_keyps_i = \
                im_detect_all(model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)
        if cls_refined_segms_i is not None:
            extend_results(i, all_refined_segms, cls_refined_segms_i)
        if cls_refined_keyps_i is not None:
            extend_results(i, all_refined_keyps, cls_refined_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time +
                timers['im_detect_mask'].average_time +
                timers['im_detect_keypoints'].average_time +
                timers['im_detect_refined_mask'].average_time +
                timers['im_detect_refined_keypoints'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time +
                timers['misc_mask'].average_time +
                timers['misc_keypoints'].average_time +
                timers['misc_refined_mask'].average_time +
                timers['misc_refined_keypoints'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            # visualize local result
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis_local'),
                cls_boxes_i,
                segms=cls_segms_i,
                keypoints=cls_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )
            # visualize refined result
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis_refined'),
                cls_boxes_i,
                segms=cls_refined_segms_i,
                keypoints=cls_refined_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            all_refined_segms=all_refined_segms,
            all_refined_keyps=all_refined_keyps,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps, all_refined_segms, all_refined_keyps
示例#24
0
def test_net(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )
    model = initialize_model_from_cfg(args, roidb = roidb, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)

    if cfg.TEST.TAGGING:
        all_scores = []

    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        elif cfg.TEST.USE_GT_PROPOSALS:
            box_proposals = entry['boxes'][entry['gt_classes'] > 0]
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])
        if cfg.TEST.TAGGING:
            cls_boxes_i, cls_segms_i, cls_keyps_i, scores = im_detect_all(model, im, entry, box_proposals, timers)
            all_scores.append(scores)
        else:
            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(model, im, entry, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time +
                timers['im_detect_mask'].average_time +
                timers['im_detect_keypoints'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time +
                timers['misc_mask'].average_time +
                timers['misc_keypoints'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'),
                cls_boxes_i,
                segms=cls_segms_i,
                keypoints=cls_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )

    # Evaluate relations
    if cfg.TEST.EVALUATE_REL_ACC:
        from sklearn.metrics import precision_recall_curve
        scores_collect = np.vstack(model.module.Rel_Inf.rel_scores_collect)
        gt_collect = np.hstack(model.module.Rel_Inf.rel_gt_collect)
        recalls = []
        for i in range(scores_collect.shape[-1]):
            filt = gt_collect==i
            print(i, accuracy(torch.from_numpy(scores_collect[filt]), torch.from_numpy(gt_collect[filt]).long(), (1,2)))
            recalls.append(precision_recall_curve(gt_collect==i, scores_collect[:,i])[1].mean())
            print(i, recalls[-1])
            # how to get recall!
        import pdb;pdb.set_trace()
        print(np.array(recalls).mean())

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml,
            im_filenames=[entry['image'] for entry in roidb],
            classes=dataset.classes
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))

    if cfg.TEST.TAGGING:
        # Save results
        tagging_name = 'tagging.pkl' if ind_range is None else 'tagging_range_%s_%s.pkl' % tuple(ind_range)
        tagging_file = os.path.join(output_dir, tagging_name)
        
        save_object(
            dict(
                all_scores=all_scores,
                gt_classes=[r['gt_classes'] for r in roidb]
            ), tagging_file
        )
        logger.info('Wrote tagging results to: {}'.format(tagging_file))

        # Evaluate tagging
        all_scores = np.vstack(all_scores)
        gt_classes = np.hstack([r['gt_classes'] for r in roidb])
        img_id = \
            np.hstack([np.ones(len(roidb[i]['gt_classes']))*i for i in range(len(roidb))])

        tagging_eval = {}
        print('Compute AUSUC')
        tagging_eval['ausuc'] = Compute_AUSUC(dataset, all_scores, gt_classes, cfg.TEST.CLASS_SPLIT['source'], cfg.TEST.CLASS_SPLIT['target'])
        print('Generalized on all')
        tagging_eval['all'] = evaluate(dataset, all_scores, gt_classes)
        tagging_eval['all'].update(mean_img_eval(all_scores, gt_classes, img_id))
        # Generalized on source
        source_filter = np.isin(gt_classes, cfg.TEST.CLASS_SPLIT['source'])
        _all_scores, _gt_classes = all_scores[source_filter], gt_classes[source_filter]
        _img_id = img_id[source_filter]
        if source_filter.any(): # Only when there are source ground truth
            print('Generalized on source')
            tagging_eval['gen_source'] = evaluate(dataset, _all_scores, _gt_classes)
            tagging_eval['gen_source'].update(\
                mean_img_eval(_all_scores, _gt_classes, _img_id))
            # not generalized on source
            inf_scores = np.zeros(all_scores.shape[1])
            inf_scores[cfg.TEST.CLASS_SPLIT['target']] = float('-inf')
            _all_scores, _gt_classes = all_scores[source_filter]+inf_scores, gt_classes[source_filter]
            
            print('Ungeneralized on source')
            tagging_eval['ungen_source'] = evaluate(dataset, _all_scores, _gt_classes)
            tagging_eval['ungen_source'].update(\
                mean_img_eval(_all_scores, _gt_classes, _img_id))
        # The above is showing how target labels are confusing source boxes.
        # Generalized on target
        target_filter = np.isin(gt_classes, cfg.TEST.CLASS_SPLIT['target'])
        _all_scores, _gt_classes = all_scores[target_filter], gt_classes[target_filter]
        _img_id = img_id[target_filter]
        if target_filter.any(): # Only when there are target ground truth
            print('Generalized on target')
            tagging_eval['gen_target'] = evaluate(dataset, _all_scores, _gt_classes)
            tagging_eval['gen_target'].update(\
                mean_img_eval(_all_scores, _gt_classes, _img_id))
            # not geralized on target
            inf_scores = np.zeros(all_scores.shape[1])
            inf_scores[cfg.TEST.CLASS_SPLIT['source']] = float('-inf')
            _all_scores, _gt_classes = all_scores[target_filter]+inf_scores, gt_classes[target_filter]
            
            img_id = \
                np.hstack([\
                np.ones(np.isin(roidb[i]['gt_classes'], cfg.TEST.CLASS_SPLIT['source']).sum())\
                for i in range(len(roidb))])
                
            print('Ungeneralized on target')
            tagging_eval['ungen_target'] = evaluate(dataset, _all_scores, _gt_classes)
            tagging_eval['ungen_target'].update(\
                mean_img_eval(_all_scores, _gt_classes, _img_id))
        
        tagging_eval_name = 'tagging_eval.pkl' if ind_range is None else 'tagging_eval_range_%s_%s.pkl' % tuple(ind_range)
        tagging_eval_file = os.path.join(output_dir, tagging_eval_name)
        save_object(tagging_eval, tagging_eval_file)
        logger.info('Wrote tagging eval results to: {}'.format(tagging_eval_file))

    # # Pad None for standard coco evaluation
    # if ind_range is not None:
    #     for i in range(len(all_boxes)):
    #         all_boxes[i] = [None] * start_ind + all_boxes[i] + [None] * (total_num_images - end_id)

    return all_boxes, all_segms, all_keyps
示例#25
0
def test_net(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes = {}
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes']
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])
        cls_boxes_i = im_detect_all(model, im, box_proposals, timers)

        all_boxes[entry['image']] = cls_boxes_i

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, eta
                )
            )

    cfg_yaml = yaml.dump(cfg)
    if 'train' in dataset_name:
        if ind_range is not None:
            det_name = 'discovery_range_%s_%s.pkl' % tuple(ind_range)
        else:
            det_name = 'discovery.pkl'
    else:
        if ind_range is not None:
            det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
        else:
            det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes
示例#26
0
def test_net(ind_range=None):
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    output_dir = get_output_dir(training=False)
    roidb, dataset, start_ind, end_ind, total_num_images = \
        get_roidb_and_dataset(ind_range)
    model = initialize_model_from_cfg()
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID)
    name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID)
    for i, entry in enumerate(roidb):
        if cfg.MODEL.FASTER_RCNN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue

        im = image_utils.read_image_video(entry)
        with core.NameScope(name_scope):
            with core.DeviceScope(gpu_dev):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(im[:, :, ::-1],
                                    '{:d}_{:s}'.format(i, im_name),
                                    os.path.join(output_dir, 'vis'),
                                    cls_boxes_i,
                                    segms=cls_segms_i,
                                    keypoints=cls_keyps_i,
                                    thresh=cfg.VIS_THR,
                                    box_alpha=0.8,
                                    dataset=dataset,
                                    show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    robust_pickle_dump(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#27
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    if args.dataset.startswith("coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = len(dataset.classes)   
    elif args.dataset == "miotcd":
        dataset = datasets.get_miotcd_dataset()
        cfg.MODEL.NUM_CLASSES = 12
    elif args.dataset.startswith("keypoints_coco"):
        dataset = datasets.get_coco_dataset()
        cfg.MODEL.NUM_CLASSES = 2
    elif args.dataset.startswith("bogota"):
        dataset = datasets.get_bogota_dataset()
        cfg.MODEL.NUM_CLASSES = 12
    else:
        raise ValueError('Unexpected dataset name: {}'.format(args.dataset))

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True, device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    dataset_result = {}
    for i in xrange(num_images):
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        try:
            timers = defaultdict(Timer)

            cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers)
            boxes_, segme_ , keyps_ ,clasies= convert_from_cls_format(cls_boxes,cls_segms,cls_keyps)
            im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
            dataset_result[im_name] = localize_obj_in_image(im_name,boxes_,clasies)

        except(e):
            import pdb
            pdb.set_trace()
    np.save('dictionary_answer.npy', dataset_result)     
    tmp = args.image_dir.split('/')       
    txt = str(tmp[-2:])+'_'+str(tmp[-1:])+'.csv'
    save_localization_result(dataset_result,txt)
示例#28
0
def test_net(
		args,
		dataset_name,
		proposal_file,
		output_dir,
		ind_range = None,
		gpu_id = 0):
	"""Run inference on all images in a dataset or over an index range of images
	in a dataset using a single GPU.
	"""
	assert not cfg.MODEL.RPN_ONLY, \
		'Use rpn_generate to generate proposals from RPN-only models'
	
	roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
		dataset_name, proposal_file, ind_range
	)
	# 在这里获得gt的信息
	model = initialize_model_from_cfg(args, gpu_id = gpu_id)
	num_images = len(roidb)
	num_classes = cfg.MODEL.NUM_CLASSES
	all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
	timers = defaultdict(Timer)
	
	dict_all = {}
	
	if cfg.TEST.IOU_OUT or cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
		with open("/nfs/project/libo_i/IOU.pytorch/data/cache/coco_2017_val_gt_roidb.pkl", 'rb') as fp:
			cached_roidb = pickle.load(fp)
		assert len(roidb) == len(cached_roidb)
	
	for i, entry in enumerate(roidb):
		if cfg.TEST.PRECOMPUTED_PROPOSALS:
			# The roidb may contain ground-truth rois (for example, if the roidb
			# comes from the training or val split). We only want to evaluate
			# detection on the *non*-ground-truth rois. We select only the rois
			# that have the gt_classes field set to 0, which means there's no
			# ground truth.
			box_proposals = entry['boxes'][entry['gt_classes'] == 0]
			if len(box_proposals) == 0:
				continue
		else:
			# Faster R-CNN type models generate proposals on-the-fly with an
			# in-network RPN; 1-stage models don't require proposals.
			box_proposals = None
		
		im = cv2.imread(entry['image'])
		im_name = entry['image'].split('/')[-1][:-4]
		
		cls_boxes_i, cls_segms_i, cls_keyps_i, dict_all[im_name] = im_detect_all(model, im, box_proposals, timers,
		                                                                         im_name_tag = im_name)
		if cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
			gt_i = cached_roidb[i]['boxes']
			shift_gt_iou = predbox_roi_iou(np.array(dict_all[im_name]['stage1_out'], dtype = np.float32),
			                               np.array(gt_i, dtype = "float32"))
			
			dict_all[im_name]['final_iou'] = shift_gt_iou.tolist()
			dict_all[im_name]['shift_iou'] = dict_all[im_name]['shift_iou'].tolist()
			
			if cfg.FAST_RCNN.FAST_HEAD2_DEBUG_VIS and i < 100:
				
				if cfg.FAST_RCNN.IOU_NMS:
					with open("/nfs/project/libo_i/IOU.pytorch/IOU_Validation/cls_tracker.json", 'r') as f:
						cls_tracker = json.load(f)
				elif cfg.FAST_RCNN.SCORE_NMS:
					with open("/nfs/project/libo_i/IOU.pytorch/IOU_Validation/cls_tracker.json", 'r') as f:
						cls_tracker = json.load(f)
				
				# Draw stage1 pred_boxes onto im and gt
				dpi = 200
				fig = plt.figure(frameon = False)
				fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
				ax = plt.Axes(fig, [0., 0., 1., 1.])
				ax.axis('off')
				fig.add_axes(ax)
				ax.imshow(im[:, :, ::-1])
				# 在im上添加gt
				for item in gt_i:
					ax.add_patch(
						plt.Rectangle((item[0], item[1]),
						              item[2] - item[0],
						              item[3] - item[1],
						              fill = False, edgecolor = 'r',
						              linewidth = 0.3, alpha = 1))
				
				# 在im上添加proposals
				length = len(dict_all[im_name]['boxes'])
				for ind in range(length):
					# stage1_item = dict_all[im_name]['stage1_pred_boxes'][ind]
					stage1_item = dict_all[im_name]['boxes'][ind]
					score_item = dict_all[im_name]['score'][ind]
					score_item = round(score_item, 2)
					ax.add_patch(
						plt.Rectangle((stage1_item[0], stage1_item[1]),
						              stage1_item[2] - stage1_item[0],
						              stage1_item[3] - stage1_item[1],
						              fill = False, edgecolor = 'g',
						              linewidth = 0.5, alpha = 1))
					ax.text(
						stage1_item[0], stage1_item[1] - 2,
						str(score_item),
						fontsize = 4,
						family = 'serif',
						bbox = dict(facecolor = 'g', alpha = 1, pad = 0, edgecolor = 'none'), color = 'white')
				
				length = len(dict_all[im_name]['stage1_out'])
				for ind in range(length):
					# stage1_item = dict_all[im_name]['stage1_pred_boxes'][ind]
					stage1_item = dict_all[im_name]['stage1_out'][ind]
					ax.add_patch(
						plt.Rectangle((stage1_item[0], stage1_item[1]),
						              stage1_item[2] - stage1_item[0],
						              stage1_item[3] - stage1_item[1],
						              fill = False, edgecolor = 'orange',
						              linewidth = 0.1, alpha = 0.6))
				
				fig.savefig("/nfs/project/libo_i/IOU.pytorch/2stage_iminfo/{}.png".format(im_name), dpi = dpi)
				plt.close('all')
			
			dict_all[im_name].pop('stage1_out')
			dict_all[im_name].pop('stage2_out')
			dict_all[im_name].pop('stage2_score')
			dict_all[im_name].pop('score')
			dict_all[im_name].pop('boxes')
		
		if cfg.TEST.IOU_OUT:
			gt_i = cached_roidb[i]['boxes']
			
			# NMS
			keep = np.array(dict_all[im_name]['keep'])
			
			dict_all[im_name]['shift_iou'] = np.array(dict_all[im_name]['shift_iou'], dtype = np.float32)[
				keep].tolist()
			dict_all[im_name]['rois_score'] = np.array(dict_all[im_name]['rois_score'], dtype = np.float32)[
				keep].tolist()
			dict_all[im_name]['rois'] = np.array(dict_all[im_name]['rois'], dtype = np.float32)[keep].tolist()
			pred_boxes_scores = dict_all[im_name]['pred_boxes_scores']
			
			# Thresh filter
			iou_thrsh_keep = np.where(np.array(dict_all[im_name]['shift_iou'], dtype = np.float32) >= 0.1)[0]
			score_thrsh_keep = np.where(np.array(dict_all[im_name]['rois_score'], dtype = np.float32) >= 0.8)[0]
			
			iou_rois = np.array(dict_all[im_name]['rois'], dtype = np.float32)[iou_thrsh_keep]
			score_rois = np.array(dict_all[im_name]['rois'], dtype = np.float32)[score_thrsh_keep]
			
			roi_to_final = predbox_roi_iou(np.array(dict_all[im_name]['rois'], dtype = np.float32),
			                               np.array(gt_i, dtype = "float32"))
			iou_final_to_rois = predbox_roi_iou(np.array(gt_i, dtype = "float32"), iou_rois)
			score_final_to_rois = predbox_roi_iou(np.array(gt_i, dtype = "float32"), score_rois)
			
			dict_all[im_name]['final_iou'] = roi_to_final.tolist()
			dict_all[im_name]['iou_final_vertical'] = iou_final_to_rois.tolist()
			dict_all[im_name]['score_final_vertical'] = score_final_to_rois.tolist()
			
			if cfg.TEST.IOU_OUT_VIS:
				# 试着画出图像看一看
				dpi = 200
				fig = plt.figure(frameon = False)
				fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
				ax = plt.Axes(fig, [0., 0., 1., 1.])
				ax.axis('off')
				fig.add_axes(ax)
				ax.imshow(im[:, :, ::-1])
				# 在im上添加gt
				for item in gt_i:
					ax.add_patch(
						plt.Rectangle((item[0], item[1]),
						              item[2] - item[0],
						              item[3] - item[1],
						              fill = False, edgecolor = 'g',
						              linewidth = 0.6, alpha = 1))
				
				# 在im上添加proposals
				cnt = 0
				for ind, item in enumerate(dict_all[im_name]['rois']):
					iou_value = dict_all[im_name]['shift_iou'][ind]
					if iou_value > 0.8:
						cnt += 1
						ax.add_patch(
							plt.Rectangle((item[0], item[1]),
							              item[2] - item[0],
							              item[3] - item[1],
							              fill = False, edgecolor = 'orange',
							              linewidth = 0.5, alpha = 1))
						ax.text(
							item[0], item[1] - 2,
							str(round(iou_value, 2)),
							fontsize = 4,
							family = 'serif',
							bbox = dict(
								facecolor = 'g', alpha = 1, pad = 0, edgecolor = 'none'),
							color = 'white')
				
				for ind, item in enumerate(dict_all[im_name]['pred_boxes']):
					score_value = dict_all[im_name]['pred_boxes_scores'][ind]
					if score_value > 0.5:
						cnt += 1
						ax.add_patch(
							plt.Rectangle((item[0], item[1]),
							              item[2] - item[0],
							              item[3] - item[1],
							              fill = False, edgecolor = 'red',
							              linewidth = 0.5, alpha = 1))
						ax.text(
							item[0], item[1] - 2,
							str(round(score_value, 2)),
							fontsize = 4,
							family = 'serif',
							bbox = dict(
								facecolor = 'red', alpha = 1, pad = 0, edgecolor = 'none'),
							color = 'white')
				
				print("Here is {} proposals above 0.5 in im {}".format(cnt, im_name))
				fig.savefig("/nfs/project/libo_i/IOU.pytorch/im_out/{}.png".format(im_name), dpi = dpi)
				plt.close('all')
			
			dict_all[im_name].pop('rois')
			dict_all[im_name].pop('pred_boxes')
			dict_all[im_name].pop('keep')
		
		if i == 100:
			method = "IOU_Exp"
			if cfg.FAST_RCNN.IOU_NMS:
				method = "FPN_IOU_NMS"
			elif cfg.FAST_RCNN.SCORE_NMS:
				method = "FPN_SCORE_NMS"
			with open("/nfs/project/libo_i/IOU.pytorch/IOU_Validation/{}.json".format(method), 'w') as f:
				f.write(json.dumps(dict_all))
				print("In {} round, saved dict_all ".format(i))
		extend_results(i, all_boxes, cls_boxes_i)
		if cls_segms_i is not None:
			extend_results(i, all_segms, cls_segms_i)
		if cls_keyps_i is not None:
			extend_results(i, all_keyps, cls_keyps_i)
		
		if i % 10 == 0:  # Reduce log file size
			ave_total_time = np.sum([t.average_time for t in timers.values()])
			eta_seconds = ave_total_time * (num_images - i - 1)
			eta = str(datetime.timedelta(seconds = int(eta_seconds)))
			det_time = (
					timers['im_detect_bbox'].average_time +
					timers['im_detect_mask'].average_time +
					timers['im_detect_keypoints'].average_time
			)
			misc_time = (
					timers['misc_bbox'].average_time +
					timers['misc_mask'].average_time +
					timers['misc_keypoints'].average_time
			)
			logger.info(
				(
					'im_detect: range [{:d}, {:d}] of {:d}: '
					'{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
				).format(
					start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
					start_ind + num_images, det_time, misc_time, eta
				)
			)
		
		if cfg.VIS:
			im_name = os.path.splitext(os.path.basename(entry['image']))[0]
			vis_utils.vis_one_image(
				im[:, :, ::-1],
				'{:d}_{:s}'.format(i, im_name),
				os.path.join(output_dir, 'vis'),
				cls_boxes_i,
				segms = cls_segms_i,
				keypoints = cls_keyps_i,
				thresh = cfg.VIS_TH,
				box_alpha = 0.8,
				dataset = dataset,
				show_class = True
			)
	
	cfg_yaml = yaml.dump(cfg)
	if ind_range is not None:
		det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
	else:
		det_name = 'detections.pkl'
	det_file = os.path.join(output_dir, det_name)
	save_object(
		dict(
			all_boxes = all_boxes,
			all_segms = all_segms,
			all_keyps = all_keyps,
			cfg = cfg_yaml
		), det_file
	)
	logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
	return all_boxes, all_segms, all_keyps
示例#29
0
def test_net(
    weights_file,
    dataset_name,
    proposal_file,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )
    model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)

    anchor_blobs = _create_anchors()
    with c2_utils.NamedCpuScope(gpu_id):
        for k, v in anchor_blobs.items():
            workspace.FeedBlob(core.ScopedName(k), v)

    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                model, im, box_proposals, timers
            )

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_detect_bbox'].average_time +
                timers['im_detect_mask'].average_time +
                timers['im_detect_keypoints'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time +
                timers['misc_mask'].average_time +
                timers['misc_keypoints'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1],
                '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'),
                cls_boxes_i,
                segms=cls_segms_i,
                keypoints=cls_keyps_i,
                thresh=cfg.VIS_TH,
                box_alpha=0.8,
                dataset=dataset,
                show_class=True
            )

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = cfg.CFG_FILE + '_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_boxes=all_boxes,
            all_segms=all_segms,
            all_keyps=all_keyps,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#30
0
 for seq_idx in range(db.get_num_sequence()):
     db.set_to_sequence(seq_idx)
     seq_name = db.get_current_seq_name()
     cur_output_dir = osp.join(args.output_dir, seq_name)
     if args.no_overwrite is True and osp.exists(
             osp.join(cur_output_dir, 'results.pdf')):
         continue
     if not osp.isdir(cur_output_dir):
         os.makedirs(cur_output_dir)
         assert (cur_output_dir)
     for idx in range(db.get_current_seq_length()):
         im = db.get_image_cv2(idx)
         assert im is not None
         timers = defaultdict(Timer)
         cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                         im,
                                                         timers=timers)
         im_name = '%03d-%03d' % (seq_idx, idx)
         print(osp.join(seq_name, im_name))
         vis_utils.vis_one_image(
             im[:, :, ::-1],  # BGR -> RGB for visualization
             im_name,
             cur_output_dir,
             cls_boxes,
             cls_segms,
             cls_keyps,
             dataset=dataset,
             box_alpha=0.3,
             show_class=True,
             thresh=0.7,
             kp_thresh=2)
示例#31
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    dataset = datasets.get_hospital_dataset()
    cfg.MODEL.NUM_CLASSES = 20  # with bg
    num_class = cfg.MODEL.NUM_CLASSES
    sents = dataset.sents
    th_cls = dataset.th_cls
    cls2eng = dataset.cls2eng
    eng2type = dataset.eng2type

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):  # for each image
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        # segmentation
        # d = segment(im)
        # pdb.set_trace()

        timers = defaultdict(Timer)

        # detection
        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        # first we collect boxes from all classes
        dets_total = np.empty([0, 6], dtype=np.float32)
        for cls in range(1, num_class):  # for each cls
            dets = cls_boxes[cls]
            if dets.shape[0] == 0:
                continue
            dets_extend = np.pad(
                dets,
                ((0, 0),
                 (0, 1)),  # add 0 rows above, below and left, but 1 row right
                mode='constant',
                constant_values=cls)  # append cls to dets
            dets_total = np.vstack((dets_total, dets_extend))

        # then use a loose NMS to make each region has only one symptom
        keep = box_utils.nms(dets_total, 0.7)
        nms_dets = dets_total[keep, :]

        # iterate through remained boxes
        report, healthy = '', True
        have_sym_of_cls = [False for _ in range(num_class)]

        n = nms_dets.shape[0]
        final_results = []  # return to the web
        for idx in range(n):  # for each region
            th, cls = nms_dets[idx, -2], int(nms_dets[idx, -1])
            if th > th_cls[cls]:  # diagnosed to have the sym
                report += sents[cls][1]
                have_sym_of_cls[cls] = True
                healthy = False

                ename = cls2eng[int(cls)]
                _type = eng2type[ename]
                final_results.append({
                    'name': ename,
                    'type': _type,
                    'box': list(nms_dets[idx, 0:4])
                })

        for cls in range(1, num_class):  # for each cls
            if not have_sym_of_cls[cls]:  # if have no sym of this cls
                report += sents[cls][0]

        if healthy:
            report = sents[0][0]
        print(report)

        pdb.set_trace()

        # healthy = True  # flag indicating healthy or not
        # for cls in range(1, num_class):  # for each cls
        #     dets = cls_boxes[cls]
        #     if dets.shape[0] == 0:
        #         report += sents[cls][0]
        #         continue
        #     n = dets.shape[0]
        #     flag = False  # indicates if the sym exists
        #     for k in range(n):  # for each region
        #         if dets[k, -1] > th_cls[cls]:  # above threshold for this cls, means have this cls of symptom
        #             report += sents[cls][1]
        #             flag = True
        #             healthy = False
        #     if not flag:  # don't have this symptom
        #         report += sents[cls][0]
        #
        # if healthy:  # use the report for healthy people
        #     report = sents[0][0]

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        # vis_utils.vis_one_image(
        #     im[:, :, ::-1],  # BGR -> RGB for visualization
        #     im_name,
        #     args.output_dir,
        #     cls_boxes,
        #     cls_segms,
        #     cls_keyps,
        #     dataset=dataset,
        #     box_alpha=0.3,
        #     show_class=True,
        #     thresh=0.05,
        #     kp_thresh=2
        # )

    if args.merge_pdfs and num_images > 1:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
示例#32
0
def test_net(  # child func
        args,
        dataset_name,
        proposal_file,
        output_dir,
        ind_range=None,
        gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'

    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model = initialize_model_from_cfg(args, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)

    res_dict = {}

    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        print(i)
        tmp = {'boxes': entry['gt_boxes'], 'clses': entry['cls_list']}
        res_dict[entry['eva_id']] = tmp

        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = cv2.imread(entry['image'])

        if cfg.HISTO_EQUAL:  # LJY
            gray = cv2.equalizeHist(im[:, :, 0])
            im = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)

        cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
            model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))
        # if cfg.VIS:
        if False:
            # im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            im_name = str(entry['eva_id'])
            vis_utils.vis_one_image_orig(im[:, :, ::-1],
                                         '{:d}_{:s}'.format(i, im_name),
                                         os.path.join(output_dir, 'vis'),
                                         cls_boxes_i,
                                         segms=cls_segms_i,
                                         keypoints=cls_keyps_i,
                                         thresh=0.5,
                                         box_alpha=0.8,
                                         dataset=dataset,
                                         show_class=True)

    # LJY: save detection file !!!
    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'

    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)

    with open(os.path.join(output_dir, 'res_dict.json'), 'w') as f:
        json.dump(res_dict, f, ensure_ascii=False)

    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#33
0
def test_net(ind_range=None):
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    output_dir = get_output_dir(training=False)
    roidb, dataset, start_ind, end_ind, total_num_images = \
        get_roidb_and_dataset(ind_range)
    model = initialize_model_from_cfg()
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID)
    name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID)
    for i, entry in enumerate(roidb):
        if cfg.MODEL.FASTER_RCNN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue

        im = image_utils.read_image_video(entry)
        with core.NameScope(name_scope):
            with core.DeviceScope(gpu_dev):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(
                ('im_detect: range [{:d}, {:d}] of {:d}: '
                 '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                    start_ind + 1, end_ind, total_num_images,
                    start_ind + i + 1, start_ind + num_images,
                    det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'), cls_boxes_i,
                segms=cls_segms_i, keypoints=cls_keyps_i,
                thresh=cfg.VIS_THR,
                box_alpha=0.8, dataset=dataset, show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    robust_pickle_dump(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml),
        det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps