Exemplo n.º 1
0
def main():
    parse_args()

    rospy.init_node('yolact_ros', anonymous=True)
    if args.config is not None:
        set_cfg(args.config)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.cuda:
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        detect_ = DetectImg(net)

    try:
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down")
    cv2.destroyAllWindows()
Exemplo n.º 2
0
        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        if args.image is None and args.video is None and args.images is None:
            dataset = COCODetection(cfg.dataset.valid_images,
                                    cfg.dataset.valid_info,
                                    transform=BaseTransform(),
                                    has_gt=cfg.dataset.has_gt)
            prep_coco_cats()
        else:
            dataset = None

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        net.detect.use_fast_nms = args.fast_nms
        cfg.mask_proto_debug = args.mask_proto_debug

        detect_ = detect()
        detect_.evalvideo(net, args.video)
Exemplo n.º 3
0
# print(opt.config)

estimator = PoseNet(num_points=num_points, num_obj=num_obj)
estimator.cuda()
estimator.load_state_dict(torch.load(opt.model))
estimator.eval()

refiner = PoseRefineNet(num_points=num_points, num_obj=num_obj)
refiner.cuda()
refiner.load_state_dict(torch.load(opt.refine_model))
refiner.eval()

yolact = Yolact()
yolact.load_weights(opt.trained_model)
yolact.eval()
yolact.cuda()

torch.set_default_tensor_type('torch.cuda.FloatTensor')
yolact.detect.use_fast_nms = opt.fast_nms
yolact.detect.use_cross_class_nms = opt.cross_class_nms

# evalimage(net, args.image)

import matplotlib.pyplot as plt


def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
Exemplo n.º 4
0
class Yolact_ROS(object):
    def __init__(self, model_path, with_cuda, yolact_config, fast_nms,
                 threshold, display_cv, top_k):
        self.top_k = top_k
        self.threshold = threshold
        self.display_cv = display_cv
        print("loading Yolact ...")

        with torch.no_grad():
            set_cfg(yolact_config)
            print("Configuration: ", yolact_config)

            if with_cuda:
                cudnn.benchmark = True
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            print("use cuda: ", with_cuda)

            self.net = Yolact()
            self.net.load_weights(model_path)
            print("Model: ", model_path)
            self.net.eval()

            if with_cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = fast_nms
            print("use fast nms: ", fast_nms)
        print("Yolact loaded")

    def prediction(self, img):
        self.net.detect.cross_class_nms = True
        cfg.mask_proto_debug = False

        with torch.no_grad():
            frame = torch.Tensor(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            h, w, _ = img.shape
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=self.threshold)
            torch.cuda.synchronize()
            masks = t[3][:self.top_k]
            classes, scores, bboxes = [
                x[:self.top_k].cpu().numpy() for x in t[:3]
            ]
            time_elapsed = (time.clock() - time_start)
            num_dets_to_consider = min(self.top_k, classes.shape[0])

            for i in range(num_dets_to_consider):
                if scores[i] < self.threshold:
                    num_dets_to_consider = i
                    break

            if num_dets_to_consider >= 1:
                masks = masks[:num_dets_to_consider, :, :, None]

            masks_msg = masks.cpu().detach().numpy()
            masks_msg = masks_msg.astype(np.uint8)
            scores_msg = np.zeros(num_dets_to_consider)
            class_label_msg = np.empty(num_dets_to_consider, dtype="S20")
            bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int)
            for i in reversed(range(num_dets_to_consider)):
                scores_msg[i] = scores[i]
                class_label_msg[i] = cfg.dataset.class_names[classes[i]]
                bboxes_msg[i] = bboxes[i]
                print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i]))

            os.system('cls' if os.name == 'nt' else 'clear')
            print("%.2f" % (1 / time_elapsed), "hz")

            if self.display_cv:
                self.display(frame, masks, classes, scores, bboxes,
                             num_dets_to_consider)

            return masks_msg, class_label_msg, scores_msg, bboxes_msg

    def display(self,
                img,
                masks,
                pred_classes,
                scores,
                bboxes,
                num_dets_to_consider,
                mask_alpha=0.75):
        img_gpu = img / 255.0
        if num_dets_to_consider == 0:
            return (img_gpu * 255).byte().cpu().numpy()

        use_class_color = True
        colors = torch.cat([
            self.get_color(
                i, pred_classes, use_class_color,
                on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for i in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color_summand = masks_color[0]

        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        for i in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = bboxes[i, :]
            color = self.get_color(i, pred_classes, use_class_color)
            score = scores[i]
            cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)
            _class = cfg.dataset.class_names[pred_classes[i]]
            text_str = '%s: %.2f' % (_class, score) if True else _class
            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1
            text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale,
                                             font_thickness)[0]
            text_pt = (x1, y1 - 3)
            text_color = [255, 255, 255]
            cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4),
                          color, -1)
            cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale,
                        text_color, font_thickness, cv2.LINE_AA)

        cv2.imshow("yolact", img_numpy)
        cv2.waitKey(1)

    def get_color(self, i, pred_classes, class_color, on_gpu=None):
        color_cache = defaultdict(lambda: {})
        color_idx = (pred_classes[i] * 5 if class_color else i *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]

            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color

            return color
class Real_time_yolact():
    def __init__(self, cuda=True, detect=False):
        self.trained_model = 'yolact/weights/yolact_im400_53_7000.pth'
        self.config = 'yolact_base_config'

        if self.config is not None:
            yolact_module.set_cfg(self.config)

        if self.trained_model == 'interrupt':
            trained_model = yolact_module.SavePath.get_interrupt('weights/')
        elif self.trained_model == 'latest':
            trained_model = yolact_module.SavePath.get_latest(
                'weights/', cfg.name)

        if self.config is None:
            model_path = yolact_module.SavePath.from_str(trained_model)
            # TODO: Bad practice? Probably want to do a name lookup instead.
            config = model_path.model_name + '_config'
            print('Config not specified. Parsed %s from the file name.\n' %
                  config)
            yolact_module.set_cfg(config)

        if detect:
            cfg.eval_mask_branch = False

        with torch.no_grad():

            if cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            self.net = Yolact()
            self.net.load_weights(self.trained_model)
            self.net.eval()

            if cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = True
            self.net.detect.use_cross_class_nms = False
            cfg.mask_proto_debug = False

    def segmentation(self, img):

        with torch.no_grad():
            h, w, _ = img.shape
            frame = torch.from_numpy(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            preds = self.net(batch)
            classes, scores, boxes, masks = yolact_module.prep_display(
                5,
                preds,
                frame,
                0.5,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                fps_str='')

            if not len(masks):
                return np.zeros((img.shape[0], img.shape[1]))
            mask = masks[0]
            mask = mask.cpu().numpy()

            h, w = mask.shape
            filled_mask = np.zeros([h, w])

            contours = yolact_module.cv_contours(np.uint8(mask))
            C = len(contours)
            contours = sorted(contours, key=lambda x: cv2.contourArea(x))
            cv2.drawContours(filled_mask, contours, C - 1, 255,
                             thickness=-1)  #Fills the biggest contour

            return filled_mask

    def process(self, image_1, image_2):
        # Get segmentation masks as numpy arrays
        mask_2 = self.segmentation(img=image_2)
        mask_2 = np.uint8(mask_2)

        return mask_2