def predict_batch(predictor: DefaultPredictor, im_list: List[ndarray]) -> List:
    with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
        inputs_list = []
        for original_image in im_list:
            # Apply pre-processing to image.
            if predictor.input_format == "RGB":
                # whether the model expects BGR inputs or RGB
                original_image = original_image[:, :, ::-1]
            height, width = original_image.shape[:2]
            # Do not apply original augmentation, which is resize.
            # image = predictor.aug.get_transform(original_image).apply_image(original_image)
            image = original_image
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = {"image": image, "height": height, "width": width}
            inputs_list.append(inputs)
        predictions = predictor.model(inputs_list)
        return predictions
Пример #2
0
def load_model(model_location, threshold, N_classes):
	'''
	Read persisted weights and construct model
	'''

	cfg = get_cfg()
	cfg.merge_from_file(model_zoo.get_config_file(MODEL_CONFIG_FILE))
	
	cfg.OUTPUT_DIR = model_location
	cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
	cfg.MODEL.ROI_HEADS.NUM_CLASSES = N_classes
	cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold

	predictor = DefaultPredictor(cfg)

	if torch.cuda.is_available():
		predictor.model = predictor.model.to('cuda')
		predictor.cfg.MODEL.DEVICE = 'cuda'

	return predictor
Пример #3
0
class PlayerDetectorDetectron2():
    def __init__(self, leftSideCorners, rightSideCorners, coordMapper,
                 origResolution, outResolution, segnumx, segnumy,
                 nmsThreshold):
        # Coordinate Mapper class
        self.coordMapper = coordMapper

        self.nmsThreshold = nmsThreshold

        # Infos about the frames
        # Width is the double the original since we hconcated the frames
        self.origResolution = (origResolution[0] * 2, origResolution[1]
                               )  # Width, Height
        self.outResolution = (outResolution[0] * 2, outResolution[1]
                              )  # Width, Height
        self.trans_value = float(
            self.outResolution[0]) / self.origResolution[0]

        # TODO: Create model
        model_name = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
        self.cfg = get_cfg()
        # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
        self.cfg.merge_from_file(model_zoo.get_config_file(model_name))
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
        # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
        self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
        self.predictor = DefaultPredictor(self.cfg)

        # Hány cella legyen x (width) és y (height) irányba
        # x irányba bal és jobb oldalt is segnumx darab cella lesz
        self.segnumx = segnumx
        self.segnumy = segnumy

        # A rács celláinak oldalhosszai
        self.cell_width = (self.outResolution[0] // 2) // self.segnumx
        self.cell_height = self.outResolution[1] // self.segnumy

        # Rács elkészítése
        # Grid = (xTL, yTL, xBR, yBR)
        self.gridList = self._createGridCells()
        print('grids ->', len(self.gridList))

        #A Kamerától vett távolság függvényében változtatom a score-t (yTL)
        self.cameraDistWeight = reversed(
            np.unique(np.asarray(self.gridList)[:, 1]))
        self.cameraDistWeight = {
            yCord: (100 - idx) / 100
            for idx, yCord in enumerate(self.cameraDistWeight)
        }

        self.fieldPolygon = self._getFieldBoundary(leftSideCorners,
                                                   rightSideCorners)

    def _getFieldBoundary(self, left_side_corner_pixels,
                          right_side_corner_pixels_added_half_field):
        # Azok azért vannak, hogy aki a pálya szélén fut a szélső oldalvonalnál, kell egy kis overhead, mert ha a lába van a vonalnál, a feje már nem fér bele, sőt, a dereka sem ~ Marci)
        merged_arr = [[223, 457], [1261, 474], [1914, 522], [2560, 863],
                      [3305, 437], [3937, 435], [5119, 468], [5119, 546],
                      [2560, 1328], [2560, 1439], [2486, 1439], [223, 553]]

        merged_arr = np.array(merged_arr) * (float(
            self.outResolution[0]) / self.origResolution[0])  # outResolution

        return np.array(merged_arr, dtype=np.int32)

    def _getTeamColor(self, full_frame, bbox):
        x, y, w, h = bbox
        img = full_frame[y:y + h, x:x + w]

        ## convert to hsv
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        #Give the official range of the two teams shirt colors
        #yellow
        yellow = cv2.inRange(hsv, (20, 100, 100), (30, 255, 255))

        #red
        # we have two red, because the red value is the beginning = final value of the hsv cylinder
        red1 = cv2.inRange(hsv, (0, 100, 100), (10, 255, 255))
        red2 = cv2.inRange(hsv, (160, 100, 100), (179, 255, 255))
        red = red1 | red2

        h, w, _ = hsv.shape
        sum_masked = h * w
        yellow_percentage = cv2.countNonZero(yellow) * 100 / sum_masked
        red_percentage = cv2.countNonZero(red) * 100 / sum_masked

        if yellow_percentage > 5 and red_percentage > 5:
            return "more player from different team"
        elif yellow_percentage > 5 or red_percentage > 5:
            return "yellow" if yellow_percentage > red_percentage else "red"
        else:
            return "other"

    def preprocess(self, image):
        '''
		Deletes the outer region of the field
		'''
        # Create mask for the field
        null_frame = np.zeros([image.shape[0], image.shape[1]], dtype=np.uint8)
        cv2.fillPoly(null_frame, [self.fieldPolygon], 255)
        # Masks out the outer region of the field
        frame = cv2.bitwise_and(image, image, mask=null_frame)
        return frame

    def _predictMultipleImages(self, images: list):
        """
		Args:
			list of original_images List[np.ndarray]: an image of shape (H, W, C) (in BGR order).
		Returns:
			predictions (list):
				list of predictions for every image of images list
		"""
        # Disable gradient calculation
        with torch.no_grad():
            # Apply pre-processing to every image
            if self.predictor.input_format == "RGB":
                # whether the model expects BGR inputs or RGB
                images = [img[:, :, ::-1] for img in images]
            # create inputs for the Model
            inputs = [{
                "image": img,
                "height": img.shape[0],
                "width": img.shape[1]
            } for img in images]
            for imgDict in inputs:
                imgDict['image'] = self.predictor.aug.get_transform(
                    imgDict['image']).apply_image(imgDict['image'])
                imgDict['image'] = torch.as_tensor(
                    imgDict['image'].astype("float32").transpose(2, 0, 1))

            predictions = self.predictor.model(inputs)
            return predictions

    def _cutImageToGrids(self, image):
        # Grid = (xTL, yTL, xBR, yBR)
        return [
            image[yTL:yBR, xTL:xBR] for xTL, yTL, xBR, yBR in self.gridList
        ]

    def _createGridCells(self):
        # Grid = (xTL, yTL, xBR, yBR)
        #self.gridList = [[222, 454, 2153, 787], [465, 633, 4825, 1439], [3117, 427, 5119, 800]]
        #self.gridList = [[220, 453, 1466, 666], [1408, 433, 3739, 750], [3691, 431, 5119, 738], [421, 600, 4569, 1439]]
        #self.gridList = [[221, 470, 2029, 655], [356, 575, 2560, 1439], [2560, 502, 5117, 1330], [3224, 434, 4613, 559]]
        grids = [[221, 470, 2029, 655], [356, 575, 2560, 1439],
                 [2560, 502, 5117, 1330], [3224, 434, 4613, 559]]
        grids = np.array(grids) * (float(self.outResolution[0]) /
                                   self.origResolution[0])  # outResolution
        return grids.astype(int)

    def _filterHalfMan(self, instances):
        ioaMx = pairwise_ioa(instances.boxes_before,
                             instances.boxes_before).numpy()
        np.fill_diagonal(ioaMx, 0)
        smallBoxIdx = np.max(ioaMx, axis=0)
        smallBoxIdx = np.where(smallBoxIdx > 0.6)[0]

        # A logika az az, hogy ha a kis BBox részhalmaza egy másik BBoxnak ÉS cellahatáron van akkor az tuti hogy félembert jelent
        # Ezt úgy ellenőrzöm, hogy ha a kis BBox középpontja legalább 2 cellában benne van akkor félembert jelez
        # Mivel a cellákat úgy alakítottam ki, hogy a metszetükben egy egész ember tuti elférjen még
        print(smallBoxIdx)
        idxToDrop = []
        for i in smallBoxIdx:
            xTL, yTL, xBR, yBR = instances.boxes_before.tensor[i].numpy()
            xCenter, yCenter = (xTL + xBR) / 2, (yTL + yBR) / 2
            numOfCells = np.sum((self.gridList[:, 0] <= xCenter)
                                & (self.gridList[:, 1] <= yCenter)
                                & (self.gridList[:, 2] >= xCenter)
                                & (self.gridList[:, 3] >= yCenter))
            if numOfCells > 1:
                idxToDrop.append(i)
        idxToDrop = [
            False if idx in idxToDrop else True
            for idx in range(len(instances))
        ]

        return instances[idxToDrop]

    def _detectAndMap(self, image):
        '''
		image: Frame amin a játékosokat akarjuk megtalálni
		result: dict((x_cut, y_cut) -> counturList)

		'''
        st = time.time()
        # 0. Preprocess image
        frame = self.preprocess(image)

        # 1. Külön felvágom a képeket kis cellákra
        l_cells = self._cutImageToGrids(frame)

        # 2. Majd ezeket a képeket beadom a multiple prediktálóba
        l_preds = self._predictMultipleImages(l_cells)

        # 2.1 Lista a cellákon található instancokról
        l_preds = [x['instances'].to('cpu') for x in l_preds]

        # 2.2 Visszamappelem a bemeneti képre, majd felskálázom az 5K-s képre
        # 2.3 A Kamerától vett távolság függvényében változtatom a score-t (yTL)
        for inst, cell in zip(l_preds, self.gridList):
            inst.remove(
                'pred_masks'
            )  # pred_masks nincs használva TODO: TeamColor esetében lehet jól jön
            inst.pred_boxes.tensor[:, 0:4] += torch.Tensor(
                [cell[0], cell[1], cell[0], cell[1]])
            inst.boxes_before = inst.pred_boxes.clone(
            )  # Eredeti képen skálázás nélkül hol vannak
            inst.pred_boxes.tensor = inst.pred_boxes.tensor.divide(
                self.trans_value)
            inst.scores *= self.cameraDistWeight[cell[1]]

        # 2.4 Egész képre vonatkoztatott Instancok
        finalInstances = Instances(
            image_size=self.origResolution[::-1])  # (1440, 5120)
        finalInstances.pred_boxes = Boxes.cat([x.pred_boxes for x in l_preds])
        finalInstances.boxes_before = Boxes.cat(
            [x.boxes_before for x in l_preds])
        finalInstances.scores = torch.cat([x.scores for x in l_preds])
        finalInstances.pred_classes = torch.cat(
            [x.pred_classes for x in l_preds])

        # 3. Leszűröm az emberekre csak
        _person_class_ID = 0
        finalInstances = finalInstances[finalInstances.pred_classes ==
                                        _person_class_ID]

        # 4. NMS használata, hogy kiiktassam az átlapolódásokat
        iouIdx = torchvision.ops.nms(finalInstances.pred_boxes.tensor,
                                     finalInstances.scores, self.nmsThreshold)
        finalInstances = finalInstances[iouIdx]

        # 5. Félemberek leszűrése
        finalInstances = self._filterHalfMan(finalInstances)

        return finalInstances, frame

    def detectPlayersOnFrame(self, frame):

        # 1. Detektálom a framen a játékosokat
        allInstances, frame = self._detectAndMap(frame)

        # Ide már a pred_boxes-ban lévő koordinátáknak a 1440x5120-es dimenzióban kell lenni, mert úgy van implementálva a class
        # 2. Kiszámolom a valós koordinátájukat
        worldcoords_xy = self.coordMapper.image2xy([
            ((box[0] + box[2]) / 2, box[3])
            for box in allInstances.pred_boxes.tensor
        ])

        # 3. Leszűröm a középen lévő játékosokat
        maskWorldCoord = [
            True if x is not None else False for x in worldcoords_xy
        ]
        allInstances = allInstances[maskWorldCoord]
        worldcoords_xy = [x for x in worldcoords_xy if x is not None]

        # 4. Detekciókat tartalmazó lista létrehozása
        list_result = []
        for bigBox, smallBox, score, worldXY in zip(
                allInstances.pred_boxes.tensor.numpy(),
                allInstances.boxes_before.tensor.numpy(),
                allInstances.scores.numpy(), worldcoords_xy):
            (xTL, yTL), (xBR, yBR) = np.floor(
                smallBox[0:2]).astype(int), np.ceil(smallBox[2:4]).astype(int)
            clipped_img = frame[yTL:yBR, xTL:xBR]
            list_result.append({
                'worldXY': worldXY,
                'box': smallBox,
                'bigBox': bigBox,
                'score': score,
                'image': clipped_img
            })
        return list_result
Пример #4
0
class PlaneRCNN_Branch:
    def __init__(self, cfg, cpu_device="cpu"):
        self.predictor = DefaultPredictor(cfg)
        self._cpu_device = cpu_device
        self._K_inv_dot_xy_1 = torch.FloatTensor(
            self.get_K_inv_dot_xy_1()).to("cuda")
        self._camera_on = cfg.MODEL.CAMERA_ON
        self._embedding_on = cfg.MODEL.EMBEDDING_ON
        self.img_format = cfg.INPUT.FORMAT

    def inference(
        self,
        img_file1,
        img_file2,
    ):
        """
        input: im0, im1 path.
        """
        im0 = utils.read_image(img_file1, format=self.img_format)
        im1 = utils.read_image(img_file2, format=self.img_format)
        # Equivalent
        # im0 = cv2.imread(img_file1)
        # im1 = cv2.imread(img_file2)

        im0 = cv2.resize(im0, (640, 480))
        im1 = cv2.resize(im1, (640, 480))

        im0 = torch.as_tensor(im0.transpose(2, 0, 1).astype("float32"))
        im1 = torch.as_tensor(im1.transpose(2, 0, 1).astype("float32"))
        with torch.no_grad():
            pred = self.predictor.model([{
                "0": {
                    "image": im0
                },
                "1": {
                    "image": im1
                }
            }])[0]
        return pred

    def process(self, output):
        prediction = {"0": {}, "1": {}}
        tmp_instances = {"0": {}, "1": {}}
        for i in range(2):
            if "instances" in output[str(i)]:
                instances = output[str(i)]["instances"].to(self._cpu_device)
                prediction[str(i)]["instances"] = instances_to_coco_json(
                    instances, "demo")
                prediction[str(i)]["pred_plane"] = output[str(
                    i)]["instances"].pred_plane.to(self._cpu_device)
                tmp_instances[str(i)]["embeddingbox"] = {
                    "pred_boxes": instances.pred_boxes,
                    "scores": instances.scores,
                }
            if "proposals" in output[str(i)]:
                prediction[str(i)]["proposals"] = output[str(
                    i)]["proposals"].to(self._cpu_device)
            if output["depth"][str(i)] is not None:
                prediction[str(i)]["pred_depth"] = output["depth"][str(i)].to(
                    self._cpu_device)
                xyz = self.depth2XYZ(output["depth"][str(i)])
                prediction[str(i)] = self.override_depth(
                    xyz, prediction[str(i)])
        if self._embedding_on:
            if "pred_aff" in output:
                tmp_instances["pred_aff"] = output["pred_aff"].to(
                    self._cpu_device)
            if "geo_aff" in output:
                tmp_instances["geo_aff"] = output["geo_aff"].to(
                    self._cpu_device)
            if "emb_aff" in output:
                tmp_instances["emb_aff"] = output["emb_aff"].to(
                    self._cpu_device)
            prediction["corrs"] = tmp_instances
        if self._camera_on:
            camera_dict = {
                "logits": {
                    "tran": output["camera"]["tran"].to(self._cpu_device),
                    "rot": output["camera"]["rot"].to(self._cpu_device),
                },
                "logits_sms": {
                    "tran":
                    softmax(output["camera"]["tran"].to(self._cpu_device)),
                    "rot":
                    softmax(output["camera"]["rot"].to(self._cpu_device)),
                },
            }
            prediction["camera"] = camera_dict
        return prediction

    def depth2XYZ(self, depth):
        """
        Convert depth to point clouds
        X - width
        Y - depth
        Z - height
        """
        XYZ = self._K_inv_dot_xy_1 * depth
        return XYZ

    @staticmethod
    def get_K_inv_dot_xy_1(h=480, w=640):
        focal_length = 517.97
        offset_x = 320
        offset_y = 240

        K = [[focal_length, 0, offset_x], [0, focal_length, offset_y],
             [0, 0, 1]]

        K_inv = np.linalg.inv(np.array(K))

        K_inv_dot_xy_1 = np.zeros((3, h, w))

        for y in range(h):
            for x in range(w):
                yy = float(y) / h * 480
                xx = float(x) / w * 640

                ray = np.dot(K_inv, np.array([xx, yy, 1]).reshape(3, 1))
                K_inv_dot_xy_1[:, y, x] = ray[:, 0]
        return K_inv_dot_xy_1.reshape(3, h, w)

    @staticmethod
    def override_depth(xyz, instance):
        pred_masks = [p["segmentation"] for p in instance["instances"]]
        override_list = []
        for mask, plane in zip(pred_masks, instance["pred_plane"]):
            bimask = mask_util.decode(mask)
            if bimask.sum() == 0:
                override_list.append(plane)
                continue
            xyz_tmp = xyz[:, torch.BoolTensor(bimask)]
            offset = np.linalg.norm(plane)
            normal = plane / max(offset, 1e-8)
            offset_new = (normal @ xyz_tmp.cpu().numpy()).mean()
            override_list.append(normal * offset_new)
        if len(override_list) > 0:
            instance["pred_plane"] = torch.stack(override_list)
        return instance
Пример #5
0
    register_saver_hook(model, "roi_heads.box_head")
    register_saver_hook(model, "roi_heads.box_predictor")
    register_saver_hook(model, "roi_heads.mask_pooler")
    register_saver_hook(model, "roi_heads.mask_head")

    if 'predict and visualization':
        for path in args.input:
            raw_image = read_image(path, format="BGR")
            with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
                height, width = raw_image.shape[:2]
                img = predictor.aug.get_transform(raw_image).apply_image(raw_image)
                # here: image shape should be [C, H, W], and BGR format
                img = torch.as_tensor(img.astype("float32").transpose(2, 0, 1))

                inputs = {"image": img, "height": height, "width": width}
                pred_all = predictor.model([inputs])
                # {'instances': detectron2.structures.instances.Instances}
                pred = pred_all[0]

            break

            # visualizer = Visualizer(raw_image[:, :, ::-1])
            # # noinspection DuplicatedCode
            # if "panoptic_seg" in pred:
            #     ...
            # else:
            #     if "sem_seg" in pred:
            #         pred_vis = visualizer.draw_sem_seg(
            #             pred["sem_seg"].argmax(dim=0).to(torch.device("cpu"))
            #         )
            #     if "instances" in pred:
Пример #6
0
# another equivalent way to evaluate the model is to use `trainer.test`


# %%
from detectron2.data import DatasetMapper, build_detection_test_loader
data_loader = build_detection_test_loader(
    cfg,
    cfg.DATASETS.TEST[0],
    DatasetMapper(cfg,True)
)
#%%
print(len(data_loader))
# %%
for idx, inputs in enumerate(data_loader):  
    # print(inputs)
    metrics_dict = predictor.model(inputs)
    print(metrics_dict)

    # for k, v in metrics_dict.items() :
    #     print(k,isinstance(v, torch.Tensor))
    #     print(v)
        
    
    
    # metrics_dict = {
    #         k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
    #         for k, v in metrics_dict.items()
    #     }
    # total_losses_reduced = sum(loss for loss in metrics_dict.values())
    # print(total_losses_reduced)
Пример #7
0
img_fnames = os.listdir(data_path)
for fname in tqdm.tqdm(img_fnames):
    im = cv2.imread(os.path.join(data_path, fname))
    if im is None:
        print(f"load image failed, skipping {fname} ...")
        continue

    if predictor.input_format == "RGB":
        im = im[:, :, ::-1]
    h, w = im.shape[:2]
    image = predictor.aug.get_transform(im).apply_image(im)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

    inputs = {"image": image, "height": h, "width": w}
    outputs = predictor.model([inputs])[0]["instances"]

    images = predictor.model.preprocess_image([inputs])
    features = predictor.model.backbone(images.tensor)
    features = [features[k] for k in predictor.model.roi_heads.box_in_features]

    # add image as feature
    img_features = predictor.model.roi_heads.box_pooler(
        features, [
            detectron2.structures.boxes.Boxes(
                torch.tensor([[0., 0., w, h]]).to(
                    outputs.pred_boxes.tensor.device))
        ])
    img_features = predictor.model.roi_heads.box_head(
        img_features).detach().cpu().numpy()
    # boxes