Пример #1
0
class YOLACT_MODEL():

    def __init__(self, opts):
        #concat the two files to one file 
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):    
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()                        
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        
    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        output_image = self.display(preds, frame, None, None,
                                     undo_transform=False, score_threshold=self.threshold)
        return output_image

    def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]

        img_gpu = img_gpu * masks[0]
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
               
        return img_numpy        
Пример #2
0
def init_model(transform):
    args = parse_args()

    if args.config is not None:
        print(args.config)
        set_cfg(args.config)
        cfg.mask_proto_debug = False

    if args.trained_model == 'interrupt':
        args.trained_model = SavePath.get_interrupt('weights/')
    elif args.trained_model == 'latest':
        args.trained_model = SavePath.get_latest('weights/', cfg.name)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if args.cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')
        net = net.cuda()

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()

    return net, args
Пример #3
0
def main(args):

  rospy.init_node('yolact_ros')
  rospack = rospkg.RosPack()
  yolact_path = rospack.get_path('yolact_ros')
  
  model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth"
  model_path = SavePath.from_str(model_path_str)
  set_cfg(model_path.model_name + '_config')

  with torch.no_grad():
      results_path_str = yolact_path + "/scripts/yolact/results"
      if not os.path.exists(results_path_str):
          os.makedirs(results_path_str)

      cudnn.benchmark = True
      cudnn.fastest = True
      torch.set_default_tensor_type('torch.cuda.FloatTensor')   

      print('Loading model...', end='')
      net = Yolact()
      net.load_weights(model_path_str)
      net.eval()
      print(' Done.')

      net = net.cuda()
      net.detect.use_fast_nms = True
      cfg.mask_proto_debug = False

  ic = image_converter(net)
  

  try:
    rospy.spin()
  except KeyboardInterrupt:
    print("Shutting down")
  cv2.destroyAllWindows()
Пример #4
0
    if args.resume and not args.display:
        with open(args.ap_data_file, 'rb') as f:
            ap_data = pickle.load(f)
        calc_map(ap_data)
        exit()

    dataset = None

    print('Loading model...', end='')
    net = Yolact()
    net.load_weights(args.trained_model)
    net.eval()
    print(' Done.')

    if args.cuda:
        net = net.cuda()

    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug



scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths,
            cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net,
            args=args, root_path=root_path, use_gpu=use_gpu)

app = QApplication(sys.argv)

vis = ScannetVis(scan=scan,
                 rgb_names=rgb_names,
Пример #5
0
class MattingService:
    def __init__(self,
                 model_path="./weights/yolact_im700_54_800000.pth",
                 use_cuda=False):
        print('Loading model...', end='')
        self.use_cuda = use_cuda
        self.trained_model = model_path
        self.net = Yolact()
        self.net.load_weights(self.trained_model)
        self.net.eval()

        if self.use_cuda:
            self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        self.net.detect.use_cross_class_nms = False
        cfg.mask_proto_debug = False

        print(' Done.')

    def process(self, image, top_k=1, score_threshold=0.6):
        # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
        with torch.no_grad():
            if image is not None:
                if ':' in image:
                    inp, _image_name = image.split(':')
                    self._infer_image(self.net, inp, _image_name, top_k,
                                      score_threshold)
                else:
                    _image_name = image.split('/')[-1].split('.')[0] + '.png'
                    out = os.path.join('results/', _image_name)
                    self._infer_image(self.net, image, out, top_k,
                                      score_threshold)
                return _image_name

    def _infer_image(self, net: Yolact, path, save_path, top_k,
                     score_threshold):
        if self.use_cuda:
            frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        else:
            frame = torch.from_numpy(cv2.imread(path)).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        img_numpy = self.post_process(preds,
                                      frame,
                                      None,
                                      None,
                                      top_k,
                                      score_threshold,
                                      undo_transform=False)

        if save_path is None:
            img_numpy = img_numpy[:, :, (2, 1, 0, 3)]

        if save_path is None:
            plt.subplot()
            plt.imshow(img_numpy)
            plt.title(path)
            plt.show()
        else:
            # plt.subplot()
            # plt.imshow(img_numpy)
            # plt.title(path)
            # plt.show()
            cv2.imwrite(save_path, img_numpy)

    @staticmethod
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res
Пример #6
0
if __name__ == '__main__':

    # 数据集与标签
    valid_dataset = COCODetection(image_path='./data/coco/images/val2017/',
                                  info_file='./data/coco/annotations/instances_val2017.json',
                                  transform=BaseTransform(),
                                  has_gt=True
                                  )
    prep_coco_cats()

    # 模型
    print('Loading model...', end='')
    model = Yolact()
    model.load_weights(args.trained_model)
    model.eval()
    model = model.cuda() if args.cuda else model.cpu()
    print(' Done.')

    # 核心入口
    with torch.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.cuda:
            torch.backends.cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        # if args.resume and not args.display:
        #     with open(args.ap_data_file, 'rb') as f:
Пример #7
0
class YOLACT_MODEL():
    def __init__(self, opts):
        #concat the two files to one file
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        self.mode = opts['mode']

    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        return self.display(preds,
                            frame,
                            None,
                            None,
                            undo_transform=False,
                            score_threshold=self.threshold)

    def display(self,
                dets_out,
                img,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                top_k=100,
                score_threshold=0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]
            classes, scores, boxes = [
                x[:top_k].detach().cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().detach().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        show_mask = True
        show_box = True

        if self.mode == "mask_only":
            show_box = False

        if self.mode == "box_only":
            show_mask = False

        print("mode :", self.mode)
        print("show_mask :", show_mask)
        print("show_box :", show_box)

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if show_mask and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if show_box:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if True:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if True:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if True else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return (img_numpy, boxes, scores)
Пример #8
0
def detect():
    img_path = '/home/user/dataset/pear/train/JPEGImages'
    save_path = '/home/user/pear_output'
    weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'

    set_cfg('pear_config')

    with torch.no_grad():
        torch.cuda.set_device(0)

        ######
        # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
        # cudnn.benchmark = True
        # cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        ######

        net = Yolact()
        net.load_weights(weight_path)
        net.eval()
        net = net.cuda()
        print('model loaded...')

        net.detect.cross_class_nms = True
        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        img_names = [
            name for name in os.listdir(img_path)
            if name.endswith('.jpg') or name.endswith('.png')
        ]
        #for img_name in tqdm(img_names):
        for img_name in img_names:
            img = cv2.imread(os.path.join(img_path, img_name))
            img = torch.from_numpy(img).cuda().float()
            img = FastBaseTransform()(img.unsqueeze(0))
            start = time.time()
            preds = net(img)
            print('clw: image_name: %s, inference time use %.3fs' %
                  (img_name,
                   time.time() - start))  # inference time use 0.023s, 550x550

            # start = time.time()
            h, w = img.shape[2:]
            result = postprocess(
                preds, w, h, crop_masks=True,
                score_threshold=0.3)  # classes, scores, boxes, masks 按照score排序
            # top_k = 10
            # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个?
            # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s

            ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask
            # start = time.time()
            bFindPear = False
            for i, cls_id in enumerate(result[0]):
                if cls_id == 0 and not bFindPear:
                    pear_mask = result[3][i].cpu().numpy()
                    bFindPear = True

            # 从梨的mask中提取轮廓
            pear_outline = get_outline_from_mask(pear_mask, w, h)
            # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
            # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
            # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
            roundness = compute_roundness(pear_outline)
            ###

            result.append(roundness)
Пример #9
0
class YolactInterface(object):
    def __init__(self, model_pth, output_num=5):
        self.output_num = output_num
        with torch.no_grad():
            set_cfg("yolact_base_config")
            torch.cuda.set_device(0)
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(model_pth)
            self.net.eval()
            self.net = self.net.cuda()
        print("load model complete")

    def run_once(self, src):
        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        with torch.no_grad():
            frame = torch.Tensor(src).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            time_elapsed = (time.clock() - time_start)
            h, w, _ = src.shape
            t = postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0.)  # TODO: give a suitable threshold
            torch.cuda.synchronize()
            classes, scores, bboxes, masks = [
                x[:self.output_num].cpu().numpy() for x in t
            ]  # TODO: Only 5 objects for test
            print(time_elapsed)
        instances = self.build_up_result(masks.shape[0], classes, bboxes,
                                         masks, scores)
        return {"instances": instances}

    def build_up_result(self, num, classes, bboxes, masks, scores):
        instances = []
        for i in range(num):
            bbox = [
                bboxes[i, 0], bboxes[i, 1], bboxes[i, 2] - bboxes[i, 0],
                bboxes[i, 3] - bboxes[i, 1]
            ]
            # Round to the nearest 10th to avoid huge file sizes, as COCO suggests
            bbox = [round(float(x) * 10) / 10 for x in bbox]
            # encode segmentation with RLE
            rle = pycocotools.mask.encode(
                np.asfortranarray(masks[i, :, :].astype(
                    np.uint8)))  # rle binary encoding
            rle['counts'] = rle['counts'].decode(
                'ascii')  # json.dump doesn't like bytes strings
            # create one instance json
            instances.append({
                'category_id':
                int(classes[i]
                    ),  # TODO: origin: get_coco_cat(int(category_id))
                'bbox': {
                    "b": bbox
                },
                "segmentation": rle,
                'score': float(scores[i])
            })

        return instances
Пример #10
0
class YolactWorker(qc.QObject):
    # emits list of classes, scores, and bboxes of detected objects
    # bboxes are in (top-left, w, h) format
    # The even is passed for synchronizing display of image in videowidget
    # with the bounding boxes
    sigProcessed = qc.pyqtSignal(np.ndarray, int)
    sigInitialized = qc.pyqtSignal()
    sigError = qc.pyqtSignal(YolactException)

    def __init__(self):
        super(YolactWorker, self).__init__()
        self.mutex = qc.QMutex()
        self._image = None
        self._pos = 0
        self.top_k = 10
        self.cuda = torch.cuda.is_available()
        self.net = None
        self.score_threshold = 0.15
        self.overlap_thresh = 1.0
        self.config = yconfig.cfg
        self.weights_file = ''
        self.config_file = ''
        self.video_file = None

    def setWaitCond(self, waitCond: threading.Event) -> None:
        _ = qc.QMutexLocker(self.mutex)
        self._waitCond = waitCond

    @qc.pyqtSlot(bool)
    def enableCuda(self, on):
        settings.setValue('yolact/cuda', on)
        self.cuda = on

    @qc.pyqtSlot(int)
    def setTopK(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.top_k = value

    @qc.pyqtSlot(int)
    def setBatchSize(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.batch_size = int(value)

    @qc.pyqtSlot(float)
    def setScoreThresh(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.score_threshold = value

    @qc.pyqtSlot(float)
    def setOverlapThresh(self, value):
        """Merge objects if their bboxes overlap more than this."""
        _ = qc.QMutexLocker(self.mutex)
        self.overlap_thresh = value

    @qc.pyqtSlot(str)
    def setConfig(self, filename):
        if filename == '':
            return
        self.config_file = filename
        with open(filename, 'r') as cfg_file:
            config = yaml.safe_load(cfg_file)
            for key, value in config.items():
                logging.debug('%r \n%r %r', key, type(value), value)
                self.config.__setattr__(key, value)
            if 'mask_proto_debug' not in config:
                self.config.mask_proto_debug = False
        logging.debug(yaml.dump(self.config))

    @qc.pyqtSlot(str)
    def setWeights(self, filename: str) -> None:
        if filename == '':
            raise YolactException('Empty filename for network weights')
        self.weights_file = filename
        tic = time.perf_counter_ns()
        with torch.no_grad():
            if self.cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(self.weights_file, self.cuda)
            self.net.eval()
            if self.cuda:
                self.net = self.net.cuda()
        toc = time.perf_counter_ns()
        logging.debug('Time to load weights %f s', 1e-9 * (toc - tic))
        self.sigInitialized.emit()

    @qc.pyqtSlot(np.ndarray, int)
    def process(self, image: np.ndarray, pos: int):
        """:returns (classes, scores, boxes)

        where `boxes` is an array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.

        `classes` is the class ids of the corresponding objects.

        `scores` are the computed class scores corresponding to the detected objects.
        Roughly high score indicates strong belief that the object belongs to
        the identified class.
        """
        _ts = time.perf_counter()
        logging.debug(f'Received frame {pos}')
        if self.net is None:
            self.sigError.emit(YolactException('Network not initialized'))
            return
        # Partly follows yolact eval.py
        tic = time.perf_counter_ns()
        _ = qc.QMutexLocker(self.mutex)
        with torch.no_grad():
            if self.cuda:
                image = torch.from_numpy(image).cuda().float()
            else:
                image = torch.from_numpy(image).float()
            batch = FastBaseTransform()(image.unsqueeze(0))
            preds = self.net(batch)
            image_gpu = image / 255.0
            h, w, _ = image.shape
            save = self.config.rescore_bbox
            self.config.rescore_bbox = True
            classes, scores, boxes, masks = oututils.postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=self.score_threshold)
            idx = scores.argsort(0, descending=True)[:self.top_k]
            # if self.config.eval_mask_branch:
            #     masks = masks[idx]
            classes, scores, boxes = [
                x[idx].cpu().numpy() for x in (classes, scores, boxes)
            ]
            # This is probably not required, `postprocess` uses
            # `score_thresh` already
            num_dets_to_consider = min(self.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < self.score_threshold:
                    num_dets_to_consider = j
                    break
            # logging.debug('Bounding boxes: %r', boxes)
            # Convert from top-left bottom-right format to
            # top-left, width, height format
            if len(boxes) == 0:
                self.sigProcessed.emit(boxes, pos)
                return
            boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
            boxes = np.asanyarray(boxes, dtype=np.int_)
            if self.overlap_thresh < 1:
                dist_matrix = pairwise_distance(new_bboxes=boxes,
                                                bboxes=boxes,
                                                boxtype=OutlineStyle.bbox,
                                                metric=DistanceMetric.ios)
                bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \
                             for jj in range(ii+1, dist_matrix.shape[1]) \
                              if dist_matrix[ii, jj] < 1 - self.overlap_thresh]
                good_idx = list(set(range(boxes.shape[0])) - set(bad_idx))
                boxes = boxes[good_idx].copy()

            toc = time.perf_counter_ns()
            logging.debug('Time to process single _image: %f s',
                          1e-9 * (toc - tic))
            self.sigProcessed.emit(boxes, pos)
            logging.debug(f'Emitted bboxes for frame {pos}: {boxes}')
        _dt = time.perf_counter() - _ts
        logging.debug(
            f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
Пример #11
0
class DOTMask():

    def __init__(self, nn, input_device):
        """
        Initialisation function
        """
    
        print('Loading model...')
        self.nn = nn
        if self.nn == 'yolact':
            print("Selected NN: Yolact")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact++':
            print("Selected NN: Yolact++")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_plus_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_plus_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact_edge':
            print("Selected NN: Yolact_edge")
            #Yoloact_edge imports
            sys.path.append('../nn/yolact_edge')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn

            set_cfg("yolact_edge_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_edge_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'mrcnn':
            print("Selected NN: Mask-RCNN")
             # Keras
            import keras
            from keras.models import Model
            from keras import backend as K
            K.common.set_image_dim_ordering('tf')

            # Mask-RCNN
            sys.path.append('../nn/Mask_RCNN/')
            from mrcnn import config
            from mrcnn import utils 
            from mrcnn import model as modellib
            from inference_config import InferenceConfig

            self.config = InferenceConfig()
            self.model = modellib.MaskRCNN(
                mode="inference", 
                model_dir="../weights/",#"../nn/Mask_RCNN/mrcnn/", 
                config=self.config)

            # Load weights trained on MS-COCO
            self.model.load_weights("../weights/mask_rcnn_coco.h5", by_name=True)
        
        else:
            print("no nn defined")

        self.bridge = CvBridge()

        self._max_inactive_frames = 10 # Maximum nb of frames before destruction
        self.next_object_id = 0 # ID for next object
        self.objects_dict = {} # Detected objects dictionary
        self.var_init = 0
        self.cam_pos_qat = np.array([[0.,0.,0.],[0.,0.,0.,1.]])
        self.cam_pos = np.array([[0.,0.,0.],[0.,0.,0.]])
        
        self.dilatation = 1
        self.score_threshold = 0.1
        self.max_number_observation = 5
        self.human_threshold = 0.01
        self.object_threshold = 0.3
        self.iou_threshold = 0.9
        self.selected_classes = [0, 56, 67]
        self.masked_id = []

        #if input_device == 'xtion':
        #    self.human_threshold = 0.1
        #    self.iou_threshold = 0.3

        self.depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/masked_image_raw", 
            Image,queue_size=1)

        self.dynamic_depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/dynamic_masked_image_raw", 
            Image,queue_size=1)

        self.frame = []
        self.depth_frame = []
        self.msg_header = std_msgs.msg.Header()
        self.depth_msg_header = std_msgs.msg.Header()

        # Class names COCO dataset
        self.class_names = [
            'person', 'bicycle', 'car', 'motorcycle',
            'airplane', 'bus', 'train', 'truck', 'boat',
            'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 
            'bird', 'cat', 'dog', 'horse', 'sheep', 
            'cow', 'elephant', 'bear', 'zebra', 'giraffe', 
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 
            'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 
            'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 
            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 
            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant', 
            'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 
            'oven', 'toaster', 'sink', 'refrigerator', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
            'toothbrush']
        
    def get_active(self, val):
        for key in self.objects_dict:
            if self.objects_dict[key]["maskID"] == val:
                return self.objects_dict[key]["activeObject"]
        return "Key not exist"

    def class_selection(self, masks_in, class_ids):
        """
        Function for Mask class selection (Selected classes : 1,40,41,42,57)
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([class_ids.shape[0], masks.shape[1], masks.shape[2]])
            for l in range(masks.shape[0]):
                if (class_ids[l] == 0 or class_ids[l] == 39 or 
                    class_ids[l] == 56):
                    x[l, :, :] = masks[l, :, :]
                else:
                    x[l, :, :] = 0
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def static_masks_selection(self, masks_in, class_ids):
        """
        Function for static Mask class selection
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([masks.shape[0], masks.shape[1], masks.shape[2]])
            for i in self.objects_dict:
                if not np.in1d(i, self.masked_id):
                    if self.objects_dict[i]["activeObject"] == 1 and self.objects_dict[i]["maskID"] < masks.shape[0] and (class_ids[self.objects_dict[i]["maskID"]] == 0 or class_ids[self.objects_dict[i]["maskID"]] == 39 or 
                        class_ids[self.objects_dict[i]["maskID"]] == 56):
                        x[self.objects_dict[i]["maskID"], :, :] = masks[self.objects_dict[i]["maskID"], :, :]
                        
                    elif self.objects_dict[i]["activeObject"] == 0 and self.objects_dict[i]["maskID"] < masks.shape[0]:
                        x[self.objects_dict[i]["maskID"], :, :] = 0
                    else:
                        pass
                    self.masked_id.append(i)
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def read_objects_pose(self):

        for i in self.objects_dict:
            
            if self.objects_dict[i]["classID"]==0:
                object_type = "Person"
            elif self.objects_dict[i]["classID"]==39:
                object_type = "Bottle"
            elif self.objects_dict[i]["classID"]==56:
                object_type = "Chair"
            else:
                object_type = "Nan"

            try:
                (self.objects_dict[i]["worldPose"],rot) = listener.lookupTransform('/map',object_type+'_'+str(i), rospy.Time(0))
            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                continue
                        
    def handle_objects_pose(self):
        for i in self.objects_dict:
            if self.objects_dict[i]["classID"]==0 or self.objects_dict[i]["classID"]==39 or self.objects_dict[i]["classID"]==56:
                if self.objects_dict[i]["classID"]==0:
                    object_type = "Person"
                elif self.objects_dict[i]["classID"]==39:
                    object_type = "Bottle"
                elif self.objects_dict[i]["classID"]==56:
                    object_type = "Chair"
                else:
                    object_type = "Nan"
                
                br = tf.TransformBroadcaster()
                e_pose = self.objects_dict[i]["estimatedPose"]
                br.sendTransform((e_pose[0], e_pose[1], e_pose[2]), 
                                tf.transformations.quaternion_from_euler(0,0,0),
                                rospy.Time.now(),
                                object_type+'_'+str(i),
                                '/map')

    def iou_centered_centroid(self, rois_old, rois_new, mask_old, mask_new):
        # intersection_over_union applied on centered centroid 
        img_v = mask_old.shape[0]
        img_h = mask_old.shape[1]

        pad_x_old = int((img_v-(rois_old[3]-rois_old[1]))/2)
        pad_y_old = int((img_h-(rois_old[2]-rois_old[0]))/2)
        pad_x_new = int((img_v-(rois_new[3]-rois_new[1]))/2)
        pad_y_new = int((img_h-(rois_new[2]-rois_new[0]))/2)

        cropped_mask_old = mask_old[rois_old[1]:rois_old[3], rois_old[0]:rois_old[2]]
        cropped_mask_new = mask_new[rois_new[1]:rois_new[3], rois_new[0]:rois_new[2]]

        centered_mask_old = add_padding(cropped_mask_old, pad_y_old, pad_x_old, pad_y_old, pad_x_old)
        centered_mask_new = add_padding(cropped_mask_new, pad_y_new, pad_x_new, pad_y_new, pad_x_new)

        centered_mask_old_croped = centered_mask_old[1:478, 1:638]
        centered_mask_new_croped = centered_mask_new[1:478, 1:638]

        intersection = np.logical_and(centered_mask_old_croped, centered_mask_new_croped)
        union = np.logical_or(centered_mask_old_croped, centered_mask_new_croped)
        iou = np.sum(intersection) / np.sum(union)
        return iou

    def apply_depth_image_masking(self, image_in, masks):
        """Apply the given mask to the image.
        """
        
        image = copy.deepcopy(image_in)
        image_static = copy.deepcopy(image_in)
        for i in range(masks.shape[0]):
            is_active = self.get_active(i)
            mask = masks[i, :, :]
            mask = ndimage.binary_dilation(mask, iterations=self.dilatation)
            if is_active == 1:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
                image_static[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
            else:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])

            
        return image_static, image

    def mask_dilatation(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        for i in range(mask.shape[0]):
            mask[i] = ndimage.binary_dilation(mask[i], iterations=self.dilatation)

        print("Numpy dilation time : ", - (timebefore - time.time()))
        return mask

    def mask_dilatation_cv(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        kernel = np.ones((3,3))
        for i in range(mask.shape[0]):
            mask[i] = cv2.dilate(mask[i],kernel, iterations=self.dilatation)
        

        print("cv2 dilation time : ", - (timebefore - time.time()))
        return mask

    def get_masking_depth(self, image, mask):
        """Apply the given mask to the image.
        """
        x = np.zeros([image.shape[0], image.shape[1]])
        y = np.zeros(mask.shape[0])

        for i in range(mask.shape[0]):
            x[:, :] = np.where(mask[i,:,:] != 1,
                                0,
                                image[:, :])

            x[:, :] = np.where( np.isnan(x[:,:]),
                                0,
                                x[:, :])

            if sum(sum((x[:, :]!=0))) == 0:
                y[i] = 0
            else:
                y[i] = (x[:, :].sum()/sum(sum((x[:, :]!=0))))
        
        return y

    def add_object(self, centroid, dimensions, mask_id, class_id, mask_old, rois_old):
        dt = 0.25

        try:
            (transc, rotc) = listener.lookupTransform('/map', self.tf_camera, rospy.Time(0))
        except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
            transc = np.array([0.,0.,0.])
            rotc = np.array([0.,0.,0.,1.])

        euler = tf.transformations.euler_from_quaternion(rotc)
        rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])

        h_mat = rot
        h_mat[0:3,3:] = np.array([transc]).T
        b = h_mat.dot(np.array([[centroid[0],centroid[1],centroid[2],1]]).T)[0:3,:]
        
        y = np.array([b[0,0], b[1,0], b[2,0]])

        x = [y[0], y[1], y[2], 0, 0, 0]

        P = np.eye(len(x))

        F = np.array([[ 1,  0,  0, dt,  0,  0],
                      [ 0,  1,  0,  0, dt,  0],
                      [ 0,  0,  1,  0,  0, dt],
                      [ 0,  0,  0,  1,  0,  0],
                      [ 0,  0,  0,  0,  1,  0],
                      [ 0,  0,  0,  0,  0,  1]])

        H = np.array([[ 0.001,  0,  0,  0,  0,  0],
                      [ 0,  0.001,  0,  0,  0,  0],
                      [ 0,  0,  0.001,  0,  0,  0]])

        if class_id == 1:
            ax = 0.68
            ay = 0.68
            az = 0.68
        else:
            ax = 1
            ay = 1
            az = 1

        Q = np.array([[((dt**4)/4)*(ax**2),  0.0,  0.0,  ((dt**4)/4)*(ax**3),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**2),  0.0,  0.0, ((dt**4)/4)*(ay**3),   0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**2),  0.0,   0.0, ((dt**4)/4)*(az**3)],
                      [((dt**4)/4)*(ax**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**3),  0.0,  0.0, (dt**2)*(ax**2)]])             

        R = np.array([[ 0.8,  0,  0],
                      [ 0,  0.8,  0],
                      [ 0,  0,  1.2]])

        self.objects_dict.update({self.next_object_id : {
            "kalmanFilter" : extendedKalmanFilter(x, P, F, H, Q, R),
            "centroid" : centroid,
            "dimension" : dimensions,
            "classID" : class_id,
            "roisOld" : rois_old,
            "maskID" : mask_id,
            "maskOld" : mask_old,
            "worldPose" : [0,0,0],
            "estimatedVelocity" : [0,0,0],
            "estimatedPose" : [0,0,0],
            "inactiveNbFrame" : 0,
            "activeObject" : 0}})
        
        self.next_object_id = self.next_object_id+1
        
    def delete_object(self, object_id):
        del self.objects_dict[object_id]

    def mask_to_centroid(self, rois, mask_depth):
        current_centroids = {}
        current_dimensions = {}
        for i in range(len(rois)):    
            # 3D centroids from depth frame
            
            if args.input == 'tum':
                fx = 525.0  # focal length x
                fy = 525.0  # focal length y
                cx = 319.5  # optical center x
                cy = 239.5  # optical center y
            elif args.input == 'xtion':    
                # Asus xtion sensor 
                fx = 525
                fy = 525
                cx = 319.5
                cy = 239.5
            elif args.input == 'zed':
                # Zed sensor left img vga
                fx = 350.113
                fy = 350.113
                cx = 336.811
                cy = 190.357
            else:
                print("No valid input")
            
            # Translation from depth pixel to local point
            if mask_depth[i] == -1:
                z = 0
            else :
                z = mask_depth[i]
            
            y = (((rois[i,3]+rois[i,1])/2) - cy) * z / fy
            x = (((rois[i,2]+rois[i,0])/2) - cx) * z / fx

            # Translation from point to world coord
            current_centroids.update({i:[x, y, z]})
            current_dimensions.update({i:[rois[i,3]-rois[i,1], rois[i,2]-rois[i,0]]})
        return current_centroids, current_dimensions
        
    def live_analysis(self):
        """
        Function for live stream video masking
        """
        
        bar = [
                " Waiting for frame [=     ]              ",
                " Waiting for frame [ =    ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [     =]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [ =    ]              ",
            ]
        idx = 0
        while not rospy.is_shutdown():
            start_time = time.time()
            self.masked_id = []
            current_frame = self.frame
            current_depth_frame = self.depth_frame

            if len(current_frame)==0  or  len(current_depth_frame)==0 :

                print(bar[idx % len(bar)], end= "\r")
                idx = idx +1
                time.sleep(0.1)
            
            else:
                
                nn_start_time = time.time()
                
                if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge':
                    frame = torch.from_numpy(current_frame).cuda().float()
                    batch = FastBaseTransform()(frame.unsqueeze(0))
                    if self.nn == 'yolact_edge':
                        extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None}
                        preds = self.net(batch.cuda(), extras=extras)
                        preds = preds["pred_outs"]
                    else:
                        preds = self.net(batch.cuda())
                        
                    nn_pred_time = time.time()
                    h, w, _ = frame.shape
                    b = {}
                    r = {}
                    b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold)

                    r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy())
                    r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy())
                    r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy())
                    r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy())    
               
                elif self.nn == 'mrcnn':
                    results = self.model.detect([current_frame],verbose=1)
                    r = results[0]
                    r['masks'] = np.swapaxes(r['masks'],0,2)
                    r['masks'] = np.swapaxes(r['masks'],1,2)

                    for i in range(r['rois'].shape[0]):
                        buff = r['rois'][i]
                        r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]]
                    r['class_ids'] = r['class_ids'] - 1
                
                ''' Deprecated, did not enhance speed
                j=0
                for i in range(len(r['class_ids'])):
                    if not np.in1d(r['class_ids'][j], self.selected_classes):
                        r['class_ids'] = np.delete(r['class_ids'], j)
                        r['scores']= np.delete(r['scores'], j)
                        r['rois']= np.delete(r['rois'], j,axis=0)
                        r['masks']= np.delete(r['masks'], j, axis=0)
                    else:
                        j=j+1
                '''
                self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0])
                for j in range(self.number_observation):
                    if r['scores'][j] < self.score_threshold:
                        self.number_observation = j
                        break

                r['class_ids'] = r['class_ids'][:self.number_observation]
                r['scores'] = r['scores'][:self.number_observation]
                r['rois'] = r['rois'][:self.number_observation]
                r['masks'] = r['masks'][:self.number_observation]

                nn_time = time.time()

                mask_depth = self.get_masking_depth(current_depth_frame, r['masks'])
                
                # Read object tf pose
                self.read_objects_pose()
                
                # Read camera tf pose
                try:
                    (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0))
                except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                    transc = np.array([0.,0.,0.])
                    rotc = np.array([0.,0.,0.,1.])

                euler = tf.transformations.euler_from_quaternion(rotc)
                rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])
        
                h_mat = rot
                h_mat[0:3,3:] = np.array([transc]).T

                objects_to_delete = []

                # Main filter update and prediction step
                if len(r['rois']) == 0:
                    for i in self.objects_dict:
                        self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1

                        if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames:                            
                            objects_to_delete.append(i)
                    
                    for i in objects_to_delete:
                        self.delete_object(i)
                        
                else : 
                    current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth)

                    if not self.objects_dict:
                        if not len(current_centroids)==0:
                            for i in range(len(current_centroids)):
                                self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i])

                            for i in self.objects_dict:
                                self.objects_dict[i]["kalmanFilter"].prediction()
                                self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat)
                                self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3]
                                self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6]
                    else:
                        objects_pose = np.zeros((len(self.objects_dict),3))
                        objects_ids = np.zeros((len(self.objects_dict)))
                        index = 0
                        for i in self.objects_dict:
                            objects_pose[index,] = self.objects_dict[i]["centroid"]
                            objects_ids[index] = i
                            index = index + 1

                        centroids_pose = np.zeros((len(current_centroids),3))
                        for i in range(len(current_centroids)):
                            centroids_pose[i,] = current_centroids[i]
                        
                        eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten()
                        index_sorted = np.argsort(eucledian_dist_pairwise)

                        used_objects = []
                        used_centroids = []
                        
                        for index in range(len(eucledian_dist_pairwise)):
                            object_id = int(index_sorted[index] / len(centroids_pose))
                            centroid_id = index_sorted[index] % len(centroids_pose)

                            if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5):
                                if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]:
                                    timebefore = time.time()
                                    used_objects.append(object_id)
                                    used_centroids.append(centroid_id)

                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3]
                                    self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6]

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0:
                                        max_threshold = self.human_threshold
                                    else:
                                        max_threshold = self.object_threshold
                                    
                                    if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 0

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0:
                                        
                                        iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id])         
                                        if iou<self.iou_threshold:
                                            self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                        else:
                                            x=1
                                    
                                    self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0
                                    self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id
                                    self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id]
                        
                        if len(centroids_pose) < len(objects_pose):
                            for index in range(len(eucledian_dist_pairwise)):
                                object_id = int(index_sorted[index] / len(objects_pose))
                                if not np.in1d(object_id, used_objects):
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1
                                    self.objects_dict[objects_ids[object_id]]["activeObject"] = 0
                                    if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames:
                                        self.delete_object(objects_ids[object_id])
                                        used_objects.append(object_id)
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                        self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3]
                                        self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6]

                        elif len(centroids_pose) > len(objects_pose):
                            buff_id = self.next_object_id
                            for index in range(len(eucledian_dist_pairwise)):
                                centroid_id = index_sorted[index] % len(centroids_pose)
                                if not np.in1d(centroid_id, used_centroids):
                                    self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id])
                                    self.objects_dict[buff_id]["kalmanFilter"].prediction()
                                    self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3]
                                    self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6]
                                    buff_id = buff_id + 1
                               
                kalman_time = time.time()
                # Write objects filter pose to tf
                self.handle_objects_pose()

                result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks'])
                
                DDITS = Image()
                DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1')
                DDITS.header = self.depth_msg_header
                self.dynamic_depth_image_pub.publish(DDITS)

                DITS = Image()
                DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1')
                DITS.header = self.depth_msg_header
                self.depth_image_pub.publish(DITS)
                
                print_time = time.time()

                #print(" NN pred time: ", format(nn_pred_time - nn_start_time, '.3f'),", NN post time: ", format(nn_time - nn_pred_time, '.3f'),", NN time: ", format(nn_time - start_time, '.3f'), ", Kalman time: ", format(kalman_time - nn_time, '.3f'),
                #", Print time: ", format(print_time - kalman_time, '.3f'), ", Total time: ", format(time.time() - start_time, '.3f'),
                #", FPS :", format(1/(time.time() - start_time), '.2f'), end="\r")

    def image_callback(self, msg):

        self.msg_header = msg.header
        self.frame = self.bridge.imgmsg_to_cv2(msg, "bgr8")

    def depth_image_callback(self, msg):

        self.depth_msg_header = msg.header
        #32FC1 for asus xtion
        #8UC1 forkicect
        self.depth_frame = self.bridge.imgmsg_to_cv2(msg, "32FC1")
class YolactEdgeEngine:
    def __init__(self):
        parse_args(self)
        self.args.config = 'yolact_edge_mobilenetv2_config'
        set_cfg(self.args.config)
        self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth'
        self.args.top_k = 10
        self.args.score_threshold = 0.3
        self.args.trt_batch_size = 3
        self.args.disable_tensorrt = False
        self.args.use_fp16_tensorrt = False
        self.args.use_tensorrt_safe_mode = True
        self.args.cuda = True
        self.args.fast_nms = True
        self.args.display_masks = True
        self.args.display_bboxes = True
        self.args.display_text = True
        self.args.display_scores = True
        self.args.display_linecomb = False
        self.args.fast_eval = False
        self.args.deterministic = False
        self.args.no_crop = False
        self.args.crop = True
        self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images'

        setup_logger(logging_level=logging.INFO)
        self.logger = logging.getLogger('yolact.eval')

        self.color_cache = defaultdict(lambda: {})

        with torch.no_grad():
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')

            self.logger.info('Loading model...')
            self.net = Yolact(training=False)
            if self.args.trained_model is not None:
                self.net.load_weights(self.args.trained_model, args=self.args)
            else:
                self.logger.warning('No weights loaded!')
            self.net.eval()
            self.logger.info('Model loaded.')
            convert_to_tensorrt(self.net,
                                cfg,
                                self.args,
                                transform=BaseTransform())

    def evaluate(self, train_mode=False, train_cfg=None):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            inp, out = self.args.images.split(':')
            self.evalimages(inp, out)

    def evalimages(self, input_folder: str, output_folder: str):
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        print()
        for p in Path(input_folder).glob('*'):
            path = str(p)
            name = os.path.basename(path)
            name = '.'.join(name.split('.')[:-1]) + '.jpg'
            out_path = os.path.join(output_folder, name)

            img = cv2.imread(path)
            img_out = self.evalimage(img, out_path)
            #print(path + ' -> ' + out_path)
        print('Done.')

    def detect(self, img_in, return_imgs=False):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            #return self.evalimage(img_in[0])
            return self.evalbatch(img_in, return_imgs)

    def evalbatch(self, imgs, return_imgs=False):
        frame = torch.from_numpy(np.array(imgs)).cuda().float()
        batch = FastBaseTransform()(frame)

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        #start_time = time.time()
        preds = self.net(batch, extras=extras)["pred_outs"]
        #end_time = time.time()
        #print('%.3f s' % (end_time-start_time))

        imgs_out = []
        allres = []
        for i, img in enumerate(imgs):
            if return_imgs:
                img_out, res = self.prep_display(preds,
                                                 frame[i],
                                                 None,
                                                 None,
                                                 undo_transform=False,
                                                 batch_idx=i,
                                                 create_mask=True,
                                                 return_imgs=return_imgs)
                imgs_out.append(img_out)
                allres.append(res)
            else:
                res = self.prep_display(preds,
                                        frame[i],
                                        None,
                                        None,
                                        undo_transform=False,
                                        batch_idx=i,
                                        create_mask=True,
                                        return_imgs=return_imgs)
                allres.append(res)
        if return_imgs:
            return imgs_out, allres
        else:
            return allres

    def evalimage(self, img, save_path=None):
        frame = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        preds = self.net(batch, extras=extras)["pred_outs"]

        return self.prep_display(preds,
                                 frame,
                                 None,
                                 None,
                                 undo_transform=False,
                                 create_mask=True)
        #if save_path:
        #    cv2.imwrite(save_path, img_numpy)
        #return img_numpy, mask

    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     batch_idx=0,
                     create_mask=False,
                     return_imgs=False):
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
            #print(h, " ", w)

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            batch_idx,
                            visualize_lincomb=self.args.display_linecomb,
                            crop_masks=self.args.crop,
                            score_threshold=self.args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                masks = t[3][:self.args.top_k]
            classes, scores, boxes = [
                x[:self.args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.args.score_threshold:
                num_dets_to_consider = j
                break

        idx_fil = []
        for i in range(num_dets_to_consider):
            if cfg.dataset.class_names[
                    classes[i]] == 'car' or cfg.dataset.class_names[
                        classes[i]] == 'truck':
                idx_fil.append(i)
        num_dets_to_consider = len(idx_fil)

        if num_dets_to_consider == 0:
            # no detection found so just output original image
            if not create_mask:
                return (img_gpu * 255).byte().cpu().numpy()
            elif return_imgs:
                return (img_gpu * 255).byte().cpu().numpy(), ImageResult(
                    None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0)
            else:
                return ImageResult(None, None, None,
                                   np.zeros((h, w, 1), dtype='uint8'), 0)

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        if self.args.display_masks and cfg.eval_mask_branch:
            # after this, mask is of size [num_dets, h, w, l]
            #masks = masks[:num_dets_to_consider, :, :, None]
            #classes = classes[:num_dets_to_consider]
            #scores = scores[:num_dets_to_consider]
            #boxes = boxes[:num_dets_to_consider, :]

            masks = masks[idx_fil, :, :, None]
            classes = classes[idx_fil]
            scores = scores[idx_fil]
            boxes = boxes[idx_fil, :]

            if create_mask:
                mask_img = np.zeros((h, w, 1), dtype='uint8')
                for j in range(num_dets_to_consider):
                    mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype(
                        np.uint8)
                if not return_imgs:
                    return ImageResult(classes, scores, boxes, mask_img,
                                       num_dets_to_consider)

            # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # this is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # then draw the stuff that needs to be done on cpu
        # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if self.args.display_text or self.args.display_bboxes:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if self.args.display_scores else _class
                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
        return img_numpy, ImageResult(classes, scores, boxes, mask_img,
                                      num_dets_to_consider)
Пример #13
0
def evalvideo(net: Yolact, path: str):
    # If the path is a digit, parse it as a webcam index
    is_webcam = path.isdigit()

    if is_webcam:
        vid = cv2.VideoCapture(int(path))
    else:
        vid = cv2.VideoCapture(path)

    if not vid.isOpened():
        print('Could not open video "%s"' % path)
        exit(-1)

    net = CustomDataParallel(net)
    net = net.cuda()
    transform = torch.nn.DataParallel(FastBaseTransform())
    transform = transform.cuda()
    frame_times = MovingAverage(100)
    fps = 0
    # The 0.8 is to account for the overhead of time.sleep
    frame_time_target = 1 / vid.get(cv2.CAP_PROP_FPS)
    running = True

    def cleanup_and_exit():
        print()
        pool.terminate()
        vid.release()
        cv2.destroyAllWindows()
        exit()

    def get_next_frame(vid):
        return [vid.read()[1] for _ in range(args.video_multiframe)]

    def transform_frame(frames):
        with torch.no_grad():
            frames = [
                torch.from_numpy(frame).cuda().float() for frame in frames
            ]
            return frames, transform(torch.stack(frames, 0))

    def eval_network(inp):
        with torch.no_grad():
            frames, imgs = inp
            return frames, net(imgs)

    def prep_frame(inp):
        with torch.no_grad():
            frame, preds = inp
            return prep_display(preds,
                                frame,
                                None,
                                None,
                                undo_transform=False,
                                class_color=True)

    frame_buffer = Queue()
    video_fps = 0

    # All this timing code to make sure that
    def play_video():
        nonlocal frame_buffer, running, video_fps, is_webcam

        video_frame_times = MovingAverage(100)
        frame_time_stabilizer = frame_time_target
        last_time = None
        stabilizer_step = 0.0005

        while running:
            frame_time_start = time.time()

            if not frame_buffer.empty():
                next_time = time.time()
                if last_time is not None:
                    video_frame_times.add(next_time - last_time)
                    video_fps = 1 / video_frame_times.get_avg()
                cv2.imshow(path, frame_buffer.get())
                last_time = next_time

            if cv2.waitKey(1) == 27:  # Press Escape to close
                running = False

            buffer_size = frame_buffer.qsize()
            if buffer_size < args.video_multiframe:
                frame_time_stabilizer += stabilizer_step
            elif buffer_size > args.video_multiframe:
                frame_time_stabilizer -= stabilizer_step
                if frame_time_stabilizer < 0:
                    frame_time_stabilizer = 0

            new_target = frame_time_stabilizer if is_webcam else max(
                frame_time_stabilizer, frame_time_target)

            next_frame_target = max(
                2 * new_target - video_frame_times.get_avg(), 0)
            target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe
            # This gives more accurate timing than if sleeping the whole amount at once
            while time.time() < target_time:
                time.sleep(0.001)

    extract_frame = lambda x, i: (x[0][i] if x[1][i] is None else x[0][i].to(x[
        1][i]['box'].device), [x[1][i]])

    # Prime the network on the first frame because I do some thread unsafe things otherwise
    print('Initializing model... ', end='')
    eval_network(transform_frame(get_next_frame(vid)))
    print('Done.')

    # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
    sequence = [prep_frame, eval_network, transform_frame]
    pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
    pool.apply_async(play_video)

    active_frames = []

    print()
    while vid.isOpened() and running:
        start_time = time.time()

        # Start loading the next frames from the disk
        next_frames = pool.apply_async(get_next_frame, args=(vid, ))

        # For each frame in our active processing queue, dispatch a job
        # for that frame using the current function in the sequence
        for frame in active_frames:
            frame['value'] = pool.apply_async(sequence[frame['idx']],
                                              args=(frame['value'], ))

        # For each frame whose job was the last in the sequence (i.e. for all final outputs)
        for frame in active_frames:
            if frame['idx'] == 0:
                frame_buffer.put(frame['value'].get())

        # Remove the finished frames from the processing queue
        active_frames = [x for x in active_frames if x['idx'] > 0]

        # Finish evaluating every frame in the processing queue and advanced their position in the sequence
        for frame in list(reversed(active_frames)):
            frame['value'] = frame['value'].get()
            frame['idx'] -= 1

            if frame['idx'] == 0:
                # Split this up into individual threads for prep_frame since it doesn't support batch size
                active_frames += [{
                    'value': extract_frame(frame['value'], i),
                    'idx': 0
                } for i in range(1, args.video_multiframe)]
                frame['value'] = extract_frame(frame['value'], 0)

        # Finish loading in the next frames and add them to the processing queue
        active_frames.append({
            'value': next_frames.get(),
            'idx': len(sequence) - 1
        })

        # Compute FPS
        frame_times.add(time.time() - start_time)
        fps = args.video_multiframe / frame_times.get_avg()

        print(
            '\rProcessing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d    '
            % (fps, video_fps, frame_buffer.qsize()),
            end='')

    cleanup_and_exit()
Пример #14
0
def main(argv=None):
    """
    Parses the parameters or, if None, sys.argv and starts prediction mode.

    :param argv: the command-line parameters to parse (list of strings)
    :type: argv: list
    """

    parser = argparse.ArgumentParser(description='YOLACT Prediction')
    parser.add_argument('--model',
                        required=True,
                        type=str,
                        help='The trained model to use (.pth file).')
    parser.add_argument('--config',
                        default="external_config",
                        help='The name of the configuration to use.')
    parser.add_argument(
        '--top_k',
        default=5,
        type=int,
        help='Further restrict the number of predictions (eg objects) to parse'
    )
    parser.add_argument(
        '--score_threshold',
        default=0,
        type=float,
        help=
        'Detections with a score under this threshold will not be considered.')
    parser.add_argument(
        '--fast_nms',
        action="store_false",
        help='Whether to use a faster, but not entirely correct version of NMS.'
    )
    parser.add_argument('--cross_class_nms',
                        action="store_true",
                        help='Whether compute NMS cross-class or per-class.')
    parser.add_argument(
        '--prediction_in',
        default=None,
        type=str,
        required=True,
        help='The directory in which to look for images for processing.')
    parser.add_argument('--prediction_out',
                        default=None,
                        type=str,
                        required=True,
                        help='The directory to store the results in.')
    parser.add_argument(
        '--prediction_tmp',
        default=None,
        type=str,
        required=False,
        help=
        'The directory to store the results in first, before moving them to the actual output directory.'
    )
    parser.add_argument(
        '--continuous',
        action="store_true",
        help=
        'Whether to continuously poll the input directory or exit once all initial images have been processed.'
    )
    parser.add_argument(
        '--delete_input',
        action="store_true",
        help=
        'Whether to delete the input images rather than moving them to the output directory.'
    )
    parser.add_argument(
        '--output_polygons',
        action='store_true',
        help=
        'Whether to masks are predicted and polygons should be output in the ROIS CSV files',
        required=False,
        default=False)
    parser.add_argument(
        '--fit_bbox_to_polygon',
        action='store_true',
        help=
        'When outputting polygons whether to fit the bounding box to the polygon',
        required=False,
        default=False)
    parser.add_argument(
        '--bbox_as_fallback',
        default=-1.0,
        type=float,
        help=
        'When outputting polygons the bbox can be used as fallback polygon. This happens if the ratio '
        +
        'between the surrounding bbox of the polygon and the bbox is smaller than the specified value. '
        + 'Turned off if < 0.',
        required=False)
    parser.add_argument(
        '--mask_threshold',
        type=float,
        help='The threshold (0-1) to use for determining the contour of a mask',
        required=False,
        default=0.1)
    parser.add_argument(
        '--mask_nth',
        type=int,
        help='To speed polygon detection up, use every nth row and column only',
        required=False,
        default=1)
    parser.add_argument(
        '--output_minrect',
        action='store_true',
        help=
        'When outputting polygons whether to store the minimal rectangle around the objects in the CSV files as well',
        required=False,
        default=False)
    parser.add_argument(
        '--view_margin',
        default=2,
        type=int,
        required=False,
        help=
        'The number of pixels to use as margin around the masks when determining the polygon'
    )
    parser.add_argument(
        '--fully_connected',
        default='high',
        choices=['high', 'low'],
        required=False,
        help=
        'When determining polygons, whether regions of high or low values should be fully-connected at isthmuses'
    )
    parser.add_argument(
        '--output_width_height',
        action='store_true',
        help=
        "Whether to output x/y/w/h instead of x0/y0/x1/y1 in the ROI CSV files",
        required=False,
        default=False)
    parser.add_argument(
        '--scale',
        type=float,
        help=
        'The scale factor to apply to the image (0-1) before processing. Output will be in original dimension space.',
        required=False,
        default=1.0)
    parser.add_argument(
        '--debayer',
        default="",
        type=str,
        help='The OpenCV2 debayering method to use, eg "COLOR_BAYER_BG2BGR"',
        required=False)
    parser.add_argument(
        '--output_mask_image',
        action='store_true',
        default=False,
        help=
        "Whether to output a mask image (PNG) when predictions generate masks (independent of outputting polygons)",
        required=False)
    parsed = parser.parse_args(args=argv)

    if parsed.fit_bbox_to_polygon and (parsed.bbox_as_fallback >= 0):
        raise Exception(
            "Options --fit_bbox_to_polygon and --bbox_as_fallback cannot be used together!"
        )
    if (parsed.debayer is not None
        ) and not (parsed.debayer
                   == "") and not parsed.debayer.startswith("COLOR_BAYER_"):
        raise Exception(
            "Expected debayering type to start with COLOR_BAYER_, instead got: "
            + str(parsed.debayer))

    with torch.no_grad():
        # initializing cudnn
        print('Initializing cudnn', end='')
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        print(' Done.')

        # load configuration and model
        print('Loading config %s' % parsed.config, end='')
        set_cfg(parsed.config)
        cfg.mask_proto_debug = False
        print(' Done.')

        print('Loading model: %s' % parsed.model, end='')
        net = Yolact()
        net.load_weights(parsed.model)
        net.eval()
        net = net.cuda()
        net.detect.use_fast_nms = parsed.fast_nms
        net.detect.use_cross_class_nms = parsed.cross_class_nms
        print(' Done.')

        predict(model=net,
                input_dir=parsed.prediction_in,
                output_dir=parsed.prediction_out,
                tmp_dir=parsed.prediction_tmp,
                top_k=parsed.top_k,
                score_threshold=parsed.score_threshold,
                delete_input=parsed.delete_input,
                output_polygons=parsed.output_polygons,
                mask_threshold=parsed.mask_threshold,
                mask_nth=parsed.mask_nth,
                output_minrect=parsed.output_minrect,
                view_margin=parsed.view_margin,
                fully_connected=parsed.fully_connected,
                fit_bbox_to_polygon=parsed.fit_bbox_to_polygon,
                output_width_height=parsed.output_width_height,
                bbox_as_fallback=parsed.bbox_as_fallback,
                scale=parsed.scale,
                debayer=parsed.debayer,
                continuous=parsed.continuous,
                output_mask_image=parsed.output_mask_image)
Пример #15
0
class pear_detector(object):
    #def __init__(self, weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'):
    def __init__(
            self,
            weight_path='C:/Users/user/yolact_notes/weights/yolact_darknet53_249_2000.pth',
            save_path='C:/Users/user/yolact_notes/pear_output'):
        set_cfg('pear_config')
        self.save_path = save_path
        self.weight_path = weight_path
        self.net = Yolact()
        self.net.load_weights(self.weight_path)
        self.net.eval()
        self.net = self.net.cuda()
        print('model loaded...')

        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True

    def detect(self, img):
        try:
            print('')
            print(
                '======================== clw: detect of python nn start !! ================================'
            )
            print('img.shape:', img.shape)

            with torch.no_grad():
                torch.cuda.set_device(0)

                ######
                # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
                # cudnn.benchmark = True
                # cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
                ######

                cfg.mask_proto_debug = False

                # if not os.path.exists(self.save_path):
                #     os.mkdir(self.save_path)

                #img = img[:, :, ::-1].copy()
                img = img.copy(
                )  # clw note: 训练的时候cv2.imread()加载进来,然后通过BackboneTransform对BGR做处理;测试的时候用FastBaseTransform也会对BGR做处理;因此应该不需要::-1的操作
                img = torch.from_numpy(img).cuda().float()
                img = FastBaseTransform()(img.unsqueeze(0))
                start = time.time()
                preds = self.net(img)

                # start = time.time()
                h, w = img.shape[2:]
                result = postprocess(
                    preds, w, h, crop_masks=True, score_threshold=0.3
                )  # classes, scores, boxes, masks 按照score排序
                # top_k = 10
                # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个?
                # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s
                print('clw: inference time use %.3fs, item nums in result:%d' %
                      (time.time() - start, len(
                          result[0])))  # inference time use 0.023s, 550x550

                ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask
                # start = time.time()
                bFindPear = False
                for i, cls_id in enumerate(result[0]):
                    if cls_id == 0 and not bFindPear:
                        pear_mask = result[3][i].cpu().numpy()
                        bFindPear = True

                # 从梨的mask中提取轮廓
                pear_outline = get_outline_from_mask(pear_mask, w, h)
                # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
                # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
                # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
                roundness = compute_roundness(pear_outline)
                ###

                result.append(roundness)

        except:
            traceback.print_exc()

        print(
            '======================== clw: detect of python nn end !! ================================'
        )
        print('')

        return result
Пример #16
0
class RunYolact(object):
    """
    运行YOLACT的类
    source: https://github.com/dbolya/yolact/issues/9
    """
    def __init__(self,
                 trained_model: str,
                 save_json=True,
                 output_dir=None,
                 output_name="detection",
                 output_num=5):
        """
        YOLACT 初始化,参数:
            - save_json         是否将计算结果保存为json文件
            - output_dir        当上个参数为True时,这个参数表示将json文件保存到的位置
            - output_name       保存的json文件名
            - output_num        # ? 目测是要输出的类别个数
        """
        #  step 0 初始化变量
        self.save_json = save_json
        # NOTE 卧槽还有这种用法,学习了
        self.detections = None
        self.output_num = output_num
        # step 1 如果指定了要生成json文件,就创建上面的Detection类对象
        if self.save_json and output_dir is not None:
            self.detections = Detections(output_dir, output_name)
        # step 2 初始化YOLACT网络
        with torch.no_grad():
            set_cfg("yolact_base_config")
            torch.cuda.set_device(1)
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = Yolact()
            # TODO 这里的权值是需要进行修改的
            # self.net.load_weights('./weights/yolact_base_54_800000.pth')
            self.net.load_weights(trained_model)
            self.net.eval()
            self.net = self.net.cuda()
        print("load model complete")

    def run_once(self, src, image_name):
        """
        只对一张图像进行预测.参数:
            - src           # ? 要预测的图像
            - image_name    图像名称 # ? 猜测就是图像的文件名
        """
        # step 0 准备
        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        # step 1 预测
        with torch.no_grad():
            frame = torch.Tensor(src).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            time_elapsed = (time.clock() - time_start)
            h, w, _ = src.shape
            # NOTICE 这里并没有设置最小的阈值
            t = postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0.)  # TODO: give a suitable threshold
            torch.cuda.synchronize()
            classes, scores, boxes, masks = [
                x[:self.output_num].cpu().numpy() for x in t
            ]  # TODO: Only 5 objects for test
            print(time_elapsed)
            # 将预测得到的每一个结果都添加到detection对象中
            for i in range(masks.shape[0]):
                self.detections.add_instance(image_name, i, classes[i],
                                             boxes[i, :], masks[i, :, :],
                                             scores[i])
        # step 2 保存所有预测结果
        self.detections.dump_all()