예제 #1
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:args.top_k]
            classes, scores, boxes = [
                x[:args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if args.display_masks and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if args.display_text or args.display_bboxes:
            str_ = ""
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

                    #pub = rospy.Publisher('chatter',String,queue_size=10)
                    #rate = rospy.Rate(50) #10hz
                    #str_ += text_str
            #rospy.loginfo(str_)
            #pub.publish(str_)
            #rate.sleep()

        return img_numpy
예제 #2
0
def prep_display(
    dets_out,
    img,
    h,
    w,
    cfg: YolactConfig,
    undo_transform=True,
    class_color=False,
    mask_alpha=0.45,
    fps_str="",
    display_lincomb=False,
):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env("Postprocess"):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h)
        cfg.rescore_bbox = save

    with timer.env("Copy"):
        idx = t[1].argsort(0, descending=True)  # [:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = classes.shape[0]

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.0
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat(
            [
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
            dim=0,
        )
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[: (num_dets_to_consider - 1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(
            fps_str, font_face, font_scale, font_thickness
        )[0]

        img_gpu[0 : text_h + 8, 0 : text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(
            img_numpy,
            fps_str,
            text_pt,
            font_face,
            font_scale,
            text_color,
            font_thickness,
            cv2.LINE_AA,
        )

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = (
                    "%s: %.2f" % (_class, score) if args.display_scores else _class
                )

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(
                    text_str, font_face, font_scale, font_thickness
                )[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(
                    img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1
                )
                cv2.putText(
                    img_numpy,
                    text_str,
                    text_pt,
                    font_face,
                    font_scale,
                    text_color,
                    font_thickness,
                    cv2.LINE_AA,
                )

    return img_numpy
예제 #3
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     image_header=Header()):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        with torch.no_grad():
            detections = Detections()

            if undo_transform:
                img_numpy = undo_image_transformation(img, w, h)
                img_gpu = torch.Tensor(img_numpy).cuda()
            else:
                img_gpu = img / 255.0
                h, w, _ = img.shape

            with timer.env('Postprocess'):
                t = postprocess(dets_out,
                                w,
                                h,
                                visualize_lincomb=args.display_lincomb,
                                crop_masks=args.crop,
                                score_threshold=args.score_threshold)
                torch.cuda.synchronize()

            with timer.env('Copy'):
                if cfg.eval_mask_branch:
                    # Masks are drawn on the GPU, so don't copy
                    masks = t[3][:args.top_k]
                classes, scores, boxes = [
                    x[:args.top_k].cpu().numpy() for x in t[:3]
                ]

            num_dets_to_consider = min(args.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < args.score_threshold:
                    num_dets_to_consider = j
                    break

            if num_dets_to_consider == 0:
                # No detections found so just output the original image
                return (img_gpu * 255).byte().cpu().numpy()

            # Quick and dirty lambda for selecting the color for a particular index
            # Also keeps track of a per-gpu color cache for maximum speed
            def get_color(j, on_gpu=None):
                global color_cache
                color_idx = (classes[j] * 5 if class_color else j *
                             5) % len(COLORS)

                if on_gpu is not None and color_idx in color_cache[on_gpu]:
                    return color_cache[on_gpu][color_idx]
                else:
                    color = COLORS[color_idx]
                    if not undo_transform:
                        # The image might come in as RGB or BRG, depending
                        color = (color[2], color[1], color[0])
                    if on_gpu is not None:
                        color = torch.Tensor(color).to(on_gpu).float() / 255.
                        color_cache[on_gpu][color_idx] = color
                    return color

            # First, draw the masks on the GPU where we can do it really fast
            # Beware: very fast but possibly unintelligible mask-drawing code ahead
            # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
            if args.display_masks and cfg.eval_mask_branch:
                # After this, mask is of size [num_dets, h, w, 1]
                masks = masks[:num_dets_to_consider, :, :, None]

                # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
                colors = torch.cat([
                    get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                    for j in range(num_dets_to_consider)
                ],
                                   dim=0)
                masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

                # This is 1 everywhere except for 1-mask_alpha where the mask is
                inv_alph_masks = masks * (-mask_alpha) + 1

                # I did the math for this on pen and paper. This whole block should be equivalent to:
                #    for j in range(num_dets_to_consider):
                #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
                masks_color_summand = masks_color[0]
                if num_dets_to_consider > 1:
                    inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                      1)].cumprod(dim=0)
                    masks_color_cumul = masks_color[1:] * inv_alph_cumul
                    masks_color_summand += masks_color_cumul.sum(dim=0)

                img_gpu = img_gpu * inv_alph_masks.prod(
                    dim=0) + masks_color_summand

            # Then draw the stuff that needs to be done on the cpu
            # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
            img_numpy = (img_gpu * 255).byte().cpu().numpy()

            print("Num dets: ", num_dets_to_consider)
            if args.display_text or args.display_bboxes:
                for j in reversed(range(num_dets_to_consider)):
                    x1, y1, x2, y2 = boxes[j, :]
                    color = get_color(j)
                    score = scores[j]

                    if args.display_bboxes:
                        cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 2)

                    if args.display_text:
                        _class = cfg.dataset.class_names[classes[j]]
                        text_str = '%s: %.2f' % (
                            _class, score) if args.display_scores else _class

                        font_face = cv2.FONT_HERSHEY_DUPLEX
                        font_scale = 0.6
                        font_thickness = 1

                        text_w, text_h = cv2.getTextSize(
                            text_str, font_face, font_scale, font_thickness)[0]

                        text_pt = (x1, y1 - 10)
                        text_color = [255, 255, 255]

                        cv2.rectangle(img_numpy, (x1, y1),
                                      (x1 + text_w, y1 - text_h - 4), color,
                                      -1)
                        cv2.putText(img_numpy, text_str, text_pt, font_face,
                                    font_scale, text_color, font_thickness,
                                    cv2.LINE_AA)

                    det = Detection()
                    det.box.x1 = x1
                    det.box.y1 = y1
                    det.box.x2 = x2
                    det.box.y2 = y2
                    det.class_name = _class
                    det.score = score
                    mask_shape = np.shape(masks[j])
                    #print("Shape: ", mask_shape)
                    #mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2] # Crop
                    mask_bb = np.squeeze(
                        masks[j].cpu().numpy(),
                        axis=2)[:, :]  # Every mask (1280 * 720)
                    #print("Box: x1:", x1,", x2: ",x2,", y1: ",y1,", y2: ",y2)
                    #print("Mask in box shape: ", np.shape(mask_bb))
                    mask_rs = np.reshape(mask_bb, -1)
                    #print("New shape: ", np.shape(mask_rs))
                    #print("Mask:\n",mask_bb)
                    det.mask.height = y2 - y1
                    det.mask.width = x2 - x1
                    det.mask.mask = np.array(mask_rs, dtype=bool)
                    detections.detections.append(det)
                detections.header.stamp = image_header.stamp
                detections.header.frame_id = image_header.frame_id

            self.detections_pub.publish(detections)
            self.get_orientation_from_mask(num_dets_to_consider, img_numpy,
                                           detections, masks)

            try:
                self.image_pub.publish(
                    self.bridge.cv2_to_imgmsg(img_numpy, "bgr8"))
            except CvBridgeError as e:
                print(e)
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]
        
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        
        if args.only_person:
            for i, _class in enumerate(classes):
                if _class != 0:
                    scores[i] = -1

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if (args.display_masks or args.identify_people) and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
        
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)
        
        
        if args.identify_people:
            # Key = original detection index. Value = person index.
            det_to_person_index = {}
            prep_silh_images = np.empty((0, 299, 299, 3))
            for i in range(num_dets_to_consider):
                _class = cfg.dataset.class_names[classes[i]]
                
                if _class == "person":
                    x1, y1, x2, y2 = boxes[i, :]
        
                    silh_image = (img_gpu * masks[i] * 255)[y1:(y2+1), x1:(x2+1), [2, 1, 0]]
                    numpy_silh_image = silh_image.byte().cpu().numpy()
        
                    prep_silh_image, _ = data.dataset.preprocess(numpy_silh_image, None, 299)
                    prep_silh_images = np.vstack((prep_silh_images, np.expand_dims(prep_silh_image, axis=0)))
                    
                    det_to_person_index[i] = prep_silh_images.shape[0] - 1
        
                    # cv2.imshow("mask", numpy_silh_image)
                    # while cv2.waitKey(1) != ord("q"):
                    #     pass
        
            # data.dataset.show_batch(prep_silh_images, [0, 1, 2], ["prova1", "prova2", "prova3"])
            # pickle.dump(prep_silh_images, open("prep_silh_images.pkl", "wb"))
        
            raw_person_preds = person_classifier.predict(prep_silh_images)
            person_preds = np.argmax(raw_person_preds, axis=1)
            person_scores = np.max(raw_person_preds, axis=1)
            print(person_preds, person_scores)
        
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
    
    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        if args.identify_people:
            with open("data/casia_gait/DatasetB_split_reduced/demo_class_names.txt", "r") as person_classes_file:
                person_classes = person_classes_file.read().splitlines()
            
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                
                if args.identify_people and (j in det_to_person_index):
                    person_index = det_to_person_index[j]
                    person_pred = person_preds[person_index]
                    
                    _class = person_classes[person_pred]
                    
                    score = person_scores[person_index]
                    
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
    
    return img_numpy