def test_pad_to_size(image, bboxes, keypoints, target_height, target_width): target_size = (target_height, target_width) padded_dict = pad_to_size(target_size, image, bboxes, keypoints) unpadded_dict = unpad_from_size(**padded_dict) assert np.array_equal(image, unpadded_dict["image"]) assert np.array_equal( bboxes, unpadded_dict["bboxes"]), f"{bboxes} {unpadded_dict['bboxes']}" assert np.array_equal(keypoints, unpadded_dict["keypoints"])
def predict(dataloader, model, hparams, device): model.eval() if hparams["local_rank"] == 0: loader = tqdm(dataloader) else: loader = dataloader with torch.no_grad(): for batch in loader: torched_images = batch[ "torched_image"] # images that are rescaled and padded if hparams["fp16"]: torched_images = torched_images.half() image_paths = batch["image_path"] pads = batch["pads"] heights = batch["original_height"] widths = batch["original_width"] batch_size = torched_images.shape[0] predictions = model(torched_images.to(device)) for batch_id in range(batch_size): file_id = Path(image_paths[batch_id]).stem folder_name = Path(image_paths[batch_id]).parent.name mask = (predictions[batch_id][0].cpu().numpy() > 0).astype( np.uint8) * 255 mask = unpad_from_size(pads, image=mask)["image"] mask = cv2.resize( mask, (widths[batch_id].item(), heights[batch_id].item()), interpolation=cv2.INTER_NEAREST) (hparams["output_mask_path"] / folder_name).mkdir( exist_ok=True, parents=True) cv2.imwrite( str(hparams["output_mask_path"] / folder_name / f"{file_id}.png"), mask)
def predict_jsons( self, image: np.array, confidence_threshold: float = 0.7, nms_threshold: float = 0.4 ) -> List[Dict[str, Union[List, float]]]: with torch.no_grad(): original_height, original_width = image.shape[:2] scale_landmarks = torch.from_numpy(np.tile([self.max_size, self.max_size], 5)).to(self.device) scale_bboxes = torch.from_numpy(np.tile([self.max_size, self.max_size], 2)).to(self.device) transformed_image = self.transform(image=image)["image"] paded = pad_to_size(target_size=(self.max_size, self.max_size), image=transformed_image) pads = paded["pads"] torched_image = tensor_from_rgb_image(paded["image"]).to(self.device) loc, conf, land = self.model(torched_image.unsqueeze(0)) conf = F.softmax(conf, dim=-1) annotations: List[Dict[str, Union[List, float]]] = [] boxes = decode(loc.data[0], self.prior_box, self.variance) boxes *= scale_bboxes scores = conf[0][:, 1] landmarks = decode_landm(land.data[0], self.prior_box, self.variance) landmarks *= scale_landmarks # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # Sort from high to low order = scores.argsort(descending=True) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() if boxes.shape[0] == 0: return [{"bbox": [], "score": -1, "landmarks": []}] landmarks = landmarks[keep] scores = scores[keep].cpu().numpy().astype(np.float64) boxes = boxes.cpu().numpy() landmarks = landmarks.cpu().numpy() landmarks = landmarks.reshape([-1, 2]) unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks) resize_coeff = max(original_height, original_width) / self.max_size boxes = (unpadded["bboxes"] * resize_coeff).astype(int) landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int) for box_id, bbox in enumerate(boxes): x_min, y_min, x_max, y_max = bbox x_min = np.clip(x_min, 0, original_width - 1) x_max = np.clip(x_max, x_min + 1, original_width - 1) if x_min >= x_max: continue y_min = np.clip(y_min, 0, original_height - 1) y_max = np.clip(y_max, y_min + 1, original_height - 1) if y_min >= y_max: continue annotations += [ { "bbox": bbox.tolist(), "score": scores[box_id], "landmarks": landmarks[box_id].reshape(-1, 2).tolist(), } ] return annotations
def predict(dataloader, model, hparams, device): model.eval() # if hparams["local_rank"] == 0: # loader = tqdm(dataloader) # else: # loader = dataloader loader = tqdm(dataloader) with torch.no_grad(): for batch in loader: torched_images = batch[ "torched_image"] # images that are rescaled and padded if hparams["fp16"]: torched_images = torched_images.half() pads = batch["pads"] image_paths = batch["image_path"] image_heights = batch["image_height"] image_widths = batch["image_width"] batch_size = torched_images.shape[0] image_heights = image_heights.cpu().numpy() image_widths = image_widths.cpu().numpy() original_shapes = list(zip(image_heights, image_widths)) prediction = model(torched_images.to(device)) output_annotations = process_predictions( prediction=prediction, original_shapes=original_shapes, input_shape=torched_images.shape, pads=pads.cpu().numpy(), confidence_threshold=hparams["confidence_threshold"], nms_threshold=hparams["nms_threshold"], prior_box=hparams["prior_box"], variance=hparams["test_parameters"]["variance"], ) for batch_id in range(batch_size): annotations = output_annotations[batch_id] if not annotations[0]["bbox"]: continue file_name = Path(image_paths[batch_id]).name file_id = Path(image_paths[batch_id]).stem # Customized for saving in coco format coco_annotations = [] for annotation in annotations: x_min, y_min, x_max, y_max = annotation["bbox"] x_min = int(np.clip(x_min, 0, x_max - 1)) y_min = int(np.clip(y_min, 0, y_max - 1)) coco_bbox = { 'bbox': [x_min, y_min, x_max - x_min, y_max - y_min], 'score': annotation['score'] } coco_annotations.append(coco_bbox) predictions = { "file_name": file_name, "annotations": coco_annotations, "file_path": str(image_paths[batch_id]), } with open(hparams["output_label_path"] / f"{file_id}.json", "w") as f: json.dump(predictions, f, indent=2) if hparams["visualize"]: normalized_image = np.transpose( torched_images[batch_id].cpu().numpy(), (1, 2, 0)) image = unnormalize(normalized_image) unpadded = unpad_from_size(pads[batch_id].cpu().numpy(), image) original_image_height = image_heights[batch_id].item() original_image_width = image_widths[batch_id].item() image = cv2.resize( unpadded["image"].astype(np.uint8), (original_image_width, original_image_height)) for annotation in annotations: # landmarks = annotation["landmarks"] colors = [(255, 0, 0), (128, 255, 0), (255, 178, 102), (102, 128, 255), (0, 255, 255)] # for landmark_id, (x, y) in enumerate(landmarks): # image = cv2.circle(image, (x, y), radius=3, color=colors[landmark_id], thickness=3) x_min, y_min, x_max, y_max = annotation["bbox"] x_min = np.clip(x_min, 0, x_max - 1) y_min = np.clip(y_min, 0, y_max - 1) vis_image = cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color=(255, 255, 255), thickness=2) cv2.imwrite( str(hparams["output_vis_path"] / f"{file_id}.jpg"), cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB), )
def process_predictions(prediction, original_shapes, input_shape, pads, confidence_threshold, nms_threshold, prior_box, variance): loc, conf, land = prediction conf = F.softmax(conf, dim=-1) result: List[List[Dict[str, Union[List, float]]]] = [] batch_size, _, image_height, image_width = input_shape scale1 = torch.from_numpy(np.tile([image_width, image_height], 5)).to(loc.device) scale = torch.from_numpy(np.tile([image_width, image_height], 2)).to(loc.device) for batch_id in range(batch_size): annotations: List[Dict[str, Union[List, float]]] = [] boxes = decode(loc.data[batch_id], prior_box.to(loc.device), variance) boxes *= scale scores = conf[batch_id][:, 1] landmarks = decode_landm(land.data[batch_id], prior_box.to(land.device), variance) landmarks *= scale1 # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=False) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() if boxes.shape[0] == 0: result += [[{"bbox": [], "score": -1, "landmarks": []}]] continue landmarks = landmarks[keep] scores = scores[keep].cpu().numpy().astype(np.float64) boxes = boxes.cpu().numpy() landmarks = landmarks.cpu().numpy().reshape([-1, 2]) if pads is None: pads_numpy = np.array([0, 0, 0, 0]) else: pads_numpy = pads[batch_id] unpadded = unpad_from_size(pads_numpy, bboxes=boxes, keypoints=landmarks) resize_coeff = max(original_shapes[batch_id]) / max( image_height, image_width) boxes = (unpadded["bboxes"] * resize_coeff).astype(int) landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int) for crop_id, bbox in enumerate(boxes): annotations += [{ "bbox": bbox.tolist(), "score": scores[crop_id], "landmarks": landmarks[crop_id].reshape(-1, 2).tolist(), }] result += [annotations] return result
def predict(dataloader, model, hparams, device): model.eval() if hparams["local_rank"] == 0: loader = tqdm(dataloader) else: loader = dataloader with torch.no_grad(): for batch in loader: torched_images = batch[ "torched_image"] # images that are rescaled and padded if hparams["fp16"]: torched_images = torched_images.half() pads = batch["pads"] image_paths = batch["image_path"] image_heights = batch["image_height"] image_widths = batch["image_width"] batch_size = torched_images.shape[0] image_heights = image_heights.cpu().numpy() image_widths = image_widths.cpu().numpy() original_shapes = list(zip(image_heights, image_widths)) prediction = model(torched_images.to(device)) output_annotations = process_predictions( prediction=prediction, original_shapes=original_shapes, input_shape=torched_images.shape, pads=pads.cpu().numpy(), confidence_threshold=hparams["confidence_threshold"], nms_threshold=hparams["nms_threshold"], prior_box=hparams["prior_box"], variance=hparams["test_parameters"]["variance"], keep_top_k=hparams["keep_top_k"], ) for batch_id in range(batch_size): annotations = output_annotations[batch_id] if not annotations[0]["bbox"]: continue folder_name = Path(image_paths[batch_id]).parent.name file_name = Path(image_paths[batch_id]).name file_id = Path(image_paths[batch_id]).stem predictions = { "file_name": file_name, "annotations": annotations, "file_path": str(Path(folder_name) / file_name), } (hparams["output_label_path"] / folder_name).mkdir( exist_ok=True, parents=True) result_path = hparams[ "output_label_path"] / folder_name / f"{file_id}.json" with open(result_path, "w") as f: json.dump(predictions, f, indent=2) if hparams["visualize"]: normalized_image = np.transpose( torched_images[batch_id].cpu().numpy(), (1, 2, 0)) image = unnormalize(normalized_image) unpadded = unpad_from_size(pads[batch_id].cpu().numpy(), image) original_image_height = image_heights[batch_id].item() original_image_width = image_widths[batch_id].item() image = cv2.resize( unpadded["image"].astype(np.uint8), (original_image_width, original_image_height)) for annotation in annotations: landmarks = annotation["landmarks"] colors = [(255, 0, 0), (128, 255, 0), (255, 178, 102), (102, 128, 255), (0, 255, 255)] for landmark_id, (x, y) in enumerate(landmarks): image = cv2.circle(image, (x, y), radius=3, color=colors[landmark_id], thickness=3) x_min, y_min, x_max, y_max = annotation["bbox"] x_min = np.clip(x_min, 0, x_max - 1) y_min = np.clip(y_min, 0, y_max - 1) image = cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2) (hparams["output_vis_path"] / folder_name).mkdir( exist_ok=True, parents=True) result_path = hparams[ "output_vis_path"] / folder_name / f"{file_id}.jpg" cv2.imwrite(str(result_path), cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
def predict(dataloader: torch.utils.data.DataLoader, model: nn.Module, hparams: dict, device: torch.device) -> None: model.eval() if hparams["local_rank"] == 0: loader = tqdm(dataloader) else: loader = dataloader with torch.no_grad(): for batch in loader: torched_images = batch["torched_image"] # images that are rescaled and padded if hparams["fp16"]: torched_images = torched_images.half() pads = batch["pads"] image_paths = batch["image_path"] image_heights = batch["image_height"] image_widths = batch["image_width"] batch_size = torched_images.shape[0] image_heights = image_heights.cpu().numpy() image_widths = image_widths.cpu().numpy() original_shapes = list(zip(image_heights, image_widths)) prediction = model(torched_images.to(device)) output_annotations = process_predictions( prediction=prediction, original_shapes=original_shapes, input_shape=torched_images.shape, pads=pads.cpu().numpy(), confidence_threshold=hparams["confidence_threshold"], nms_threshold=hparams["nms_threshold"], prior_box=hparams["prior_box"], variance=hparams["test_parameters"]["variance"], keep_top_k=hparams["keep_top_k"], ) for batch_id in range(batch_size): annotations = output_annotations[batch_id] if not annotations[0]["bbox"]: continue folder_name = Path(image_paths[batch_id]).parent.name file_name = Path(image_paths[batch_id]).name file_id = Path(image_paths[batch_id]).stem predictions = { "file_name": file_name, "annotations": annotations, "file_path": str(Path(folder_name) / file_name), } (hparams["output_label_path"] / folder_name).mkdir(exist_ok=True, parents=True) result_path = hparams["output_label_path"] / folder_name / f"{file_id}.json" with result_path.open("w") as f: json.dump(predictions, f, indent=2) if hparams["visualize"]: normalized_image = np.transpose(torched_images[batch_id].cpu().numpy(), (1, 2, 0)) image = unnormalize(normalized_image) unpadded = unpad_from_size(pads[batch_id].cpu().numpy(), image) original_image_height = image_heights[batch_id].item() original_image_width = image_widths[batch_id].item() image = cv2.resize( unpadded["image"].astype(np.uint8), (original_image_width, original_image_height) ) image = vis_annotations(image, annotations=annotations) # type: ignore (hparams["output_vis_path"] / folder_name).mkdir(exist_ok=True, parents=True) result_path = hparams["output_vis_path"] / folder_name / f"{file_id}.jpg" cv2.imwrite(str(result_path), cv2.cvtColor(image, cv2.COLOR_BGR2RGB))