def test_create_detection_mapping(): """Test create_detection_mapping().""" # credit to voxel51 crew for a helpful test suite from which this test borrows # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59 dataset = fo.Dataset(name="test_detection_mapping") dataset.add_sample( fo.Sample( filepath="filepath1.jpg", tags=["train"], test_dets=fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), ]), another_field=51, )) test_output = create_detection_mapping("test_detection_mapping", label_field="test_dets", training_tag="train") assert isinstance(test_output, str) assert ( test_output == 'item {\n name: "friend"\n id: 1\n}\nitem {\n name: "stopper"\n id: 2\n}\n' )
def test__create_list_of_class_names(): """Test _create_list_of_class_names.""" # credit to voxel51 crew for a helpful test suite from which this test borrows # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59 dataset = fo.Dataset() dataset.add_sample( fo.Sample( filepath="filepath1.jpg", tags=["test"], test_dets=fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="big bro", confidence=0.6, bounding_box=[0, 0, 0.1, 0.5], ), ]), another_field=51, )) test_list = _create_list_of_class_names(dataset, label_field="test_dets") assert set(test_list) == set(["friend", "stopper", "big bro"])
def _make_detection_dataset( img, images_dir, num_samples=4, num_objects_per_sample=3 ): exts = [".jpg", ".png"] samples = [] for idx in range(num_samples): filepath = os.path.join( images_dir, "%06d%s" % (idx, exts[idx % len(exts)]) ) etai.write(img, filepath) detections = [] for _ in range(num_objects_per_sample): label = random.choice(["cat", "dog", "bird", "rabbit"]) bounding_box = [ 0.8 * random.random(), 0.8 * random.random(), 0.2, 0.2, ] detections.append( fo.Detection(label=label, bounding_box=bounding_box) ) samples.append( fo.Sample( filepath=filepath, ground_truth=fo.Detections(detections=detections), ) ) dataset = fo.Dataset() dataset.add_samples(samples) return dataset
def _create_synth_fiftyone_dataset(tmpdir): img_dir = Path(tmpdir / "fo_imgs") img_dir.mkdir() Image.new('RGB', (1920, 1080)).save(img_dir / "sample_one.png") Image.new('RGB', (1920, 1080)).save(img_dir / "sample_two.png") dataset = fo.Dataset.from_dir( img_dir, dataset_type=fo.types.ImageDirectory, ) sample1 = dataset[str(img_dir / "sample_one.png")] sample2 = dataset[str(img_dir / "sample_two.png")] d1 = fo.Detection( label="person", bounding_box=[0.3, 0.4, 0.2, 0.2], ) d2 = fo.Detection( label="person", bounding_box=[0.05, 0.10, 0.28, 0.15], ) d3 = fo.Detection( label="person", bounding_box=[0.23, 0.14, 0.09, 0.18], ) d1["iscrowd"] = 1 d2["iscrowd"] = 0 d3["iscrowd"] = 0 sample1["ground_truth"] = fo.Detections(detections=[d1]) sample2["ground_truth"] = fo.Detections(detections=[d2, d3]) sample1.save() sample2.save() return dataset
def create_fo_sample(image: Image, labels: str, boxes): """ Args ----------- image: PIL image labels: label name boxes: xyxy format """ assert len(labels) == len(boxes) detections = [] for i in range(len(labels)): detections.append(fo.Detection(label=labels[i], bounding_box=boxes[i])) sample = fo.Sample(filepath=image.filename) sample["ground_truth"] = fo.Detections(detections=detections) return sample
def test_filter_detections(self): self.sample1["test_dets"] = fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="friend", confidence=0.3, bounding_box=[0.25, 0, 0.5, 0.1], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="big bro", confidence=0.6, bounding_box=[0, 0, 0.1, 0.5], ), ]) self.sample1.save() self.sample2["test_dets"] = fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.99, bounding_box=[0, 0, 1, 1], ), fo.Detection( label="tricam", confidence=0.2, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="hex", confidence=0.8, bounding_box=[0.35, 0, 0.2, 0.25], ), ]) self.sample2.save() view = self.dataset.filter_detections( "test_dets", (F("confidence") > 0.5) & (F("label") == "friend")) for sv in view: for det in sv.test_dets.detections: self.assertGreater(det.confidence, 0.5) self.assertEqual(det.label, "friend")
def add_predictions(dataset, sample_label, device, model): classes = dataset.default_classes with fo.ProgressBar() as pb: for sample in pb(dataset): # Load image image = Image.open(sample.filepath) image = F.to_tensor(image).to(device) c, h, w = image.shape preds = model([image])[0] # Non-Max suppression. # Indices to keep. idx = nms(boxes=preds["boxes"], scores=preds["scores"], iou_threshold=0.1) boxes = preds["boxes"][idx] labels = preds["labels"][idx] scores = preds["scores"][idx] labels = labels.cpu().detach().numpy() scores = scores.cpu().detach().numpy() boxes = boxes.cpu().detach().numpy() detections = [] for label, score, box in zip(labels, scores, boxes): x1, y1, x2, y2 = box rel_box = [ x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h, ] # fiftyone format detections.append( fo.Detection( label=classes[label], bounding_box=rel_box, confidence=score ) ) sample[sample_label] = fo.Detections(detections=detections) sample.save()
def train_with_hydra(cfg: DictConfig): cfg.inference.base_path= cfg.inference.model_path_to_load.split("train/",1)[0] +"inference" print("INFERENCE RESULTS WILL BE SAVED {}".format(cfg.inference.base_path)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # For inferece set always batch_size = 1 cfg.inference.batch_size = 1 createFolderForExplanation(cfg) # ---------- # INSTANTIATE TEST TRANSFORM DATA # ----------- # Dataclass for custom Image transform See dataset configuration in .yaml @dataclass class ObjectDetectionInputTransform(InputTransform): def predict_per_sample_transform(self): return instantiate(cfg.dataset.test_transform, _convert_="all") # ---------- # INSTANTIATE COCO DATA # ----------- # Load COCO formatted dataset in fiftyone fiftyone_predict_dataset = fo.Dataset.from_dir( dataset_type=fo.types.COCODetectionDataset, data_path=cfg.inference.path_folder_images, labels_path=cfg.inference.path_folder_annotations, include_id=True, label_field="ground_truth" ) class_labels = fiftyone_predict_dataset.default_classes # create datamodule to load data in lightning datamodule = ObjectDetectionData.from_fiftyone( predict_dataset=fiftyone_predict_dataset, predict_transform=ObjectDetectionInputTransform, label_field="ground_truth_detections", batch_size=cfg.inference.batch_size ) # ---------- # INSTANTIATE MODEL AND TRAINER # ----------- model = instantiate(cfg.model.model) # if((cfg.inference.model_path_to_load != None) and (cfg.inference.model_path_to_load != "")): # print("LOAD PRETRAINED WEIGHT") # model = model.load_from_checkpoint( # cfg.inference.model_path_to_load) # instantiate trainer trainer = instantiate(cfg.trainer.default) # ---------- # RUN PREDICTION # ----------- # All prediction in your dataset predictions = trainer.predict(model, datamodule=datamodule) # model needs to put on gpu after train.predict in order to run explanation on gpu if(torch.cuda.is_available()): model = model.to(device) # model in eval model for explainability model.eval() # over all predictions for i, sample in enumerate(fiftyone_predict_dataset): detections = [] # 0 is because batch_size = 1 #over all bboxes in one image for bbox,score,label_index in zip(predictions[i][0][DataKeys.PREDS]["bboxes"],predictions[i][0][DataKeys.PREDS]["scores"],predictions[i][0][DataKeys.PREDS]["labels"]): # Original Image Dimension img_width_orig = sample.metadata["width"] img_height_orig = sample.metadata["height"] # model input dimension model_input_height, model_input_width = predictions[i][0][DataKeys.METADATA]["size"] # Coordinate rescale format is (x1,y1,x2,y2) rescale_coordinate = scale_coords( (model_input_height, model_input_width), bbox, (img_height_orig, img_width_orig)) # Need to change from (x1,y1,x2,y2)-> (x1,y1,width,heigth). Coco annotations are in this format rescale_coordinate[2] = rescale_coordinate[2] - \ rescale_coordinate[0] rescale_coordinate[3] = rescale_coordinate[3] - \ rescale_coordinate[1] # Coordinates in Fiftyone are normalized for Coco rescale_coordinate[[0, 2]] /= img_width_orig rescale_coordinate[[1, 3]] /= img_height_orig # Detection threshold if(score> cfg.inference.threshold): detections.append( fo.Detection( label=class_labels[label_index], bounding_box=rescale_coordinate.tolist(), confidence=score )) # save new detection to fifyone dataset sample["inference"] = fo.Detections(detections=detections) sample.save() results = fiftyone_predict_dataset.evaluate_detections( "inference", gt_field="ground_truth_detections", eval_key="objectDetection_eval", compute_mAP=True, ) # Save Confusion Matrix plot = results.plot_confusion_matrix(classes=class_labels, backend="matplotlib", figsize=(6, 6)) plot.savefig(cfg.inference.confusion_matrix.path_to_confusion_matrix_image) print(results.mAP()) print(results.metrics())
def run_detection_model_tiled( dataset_name, training_name, prediction_field, sample_tag, tile_string, tile_overlap:int, iou_threshold:float, ): """Runs the detection model over the entire dataset using a tiling approach Args: interpreter: The ``tf.lite.Interpreter`` to update. size (tuple): The original image size as (width, height) tuple. resize: A function that takes a (width, height) tuple, and returns an image resized to those dimensions. Returns: The resized tensor with zero-padding as tuple (resized_tensor, resize_ratio). """ model_path = ( "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model" ) min_score = 0.50 # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet. input_tensor_size = 512 logging.info("Loading model...") start_time = time.time() tf.keras.backend.clear_session() detect_fn = tf.saved_model.load(model_path) infer = detect_fn.signatures["serving_default"] print(infer.structured_outputs) print(infer) end_time = time.time() elapsed_time = end_time - start_time logging.info("Loading model took: " + str(elapsed_time) + "s") category_index = _load_label_map(training_name) dataset = fo.load_dataset(dataset_name) # Go through all of the samples in the dataset for sample in dataset.match_tags(sample_tag).select_fields("filepath"): start_time = time.time() img = load_img( sample.filepath, ) img_size = img.size img_width, img_height = img_size objects_by_label = dict() exportDetections = [] predicted_objects = [] tile_sizes = [] for tile_size in tile_string.split(","): tile_size = tile_size.split("x") tile_sizes.append([int(tile_size[0]), int(tile_size[1])]) # Collect all of the detections for each tile size: for tile_size in tile_sizes: tile_width, tile_height = tile_size # For tiles that are smaller that the image size, calculated all of the different # Sub images that are needed for tile_location in _tiles_location_gen(img_size, tile_size, tile_overlap): tile = img.crop(tile_location) old_size = tile.size # old_size[0] is in (width, height) format ratio = float(input_tensor_size) / max(old_size) if ratio > 1: continue new_size = tuple([int(x * ratio) for x in old_size]) im = tile.resize(new_size, Image.ANTIALIAS) # create a new image and paste the resized on it new_im = Image.new("RGB", (input_tensor_size, input_tensor_size)) new_im.paste( im, (0, 0) ) # ((input_tensor_size-new_size[0])//2, (input_tensor_size-new_size[1])//2)) img_array = img_to_array(new_im, dtype="uint8") img_batch = np.array([img_array]) detections = detect_fn(img_batch) for i, detectScore in enumerate(detections["detection_scores"][0]): if detectScore > min_score: x1 = ( detections["detection_boxes"][0][i][1].numpy() * input_tensor_size ) # tile_width y1 = ( detections["detection_boxes"][0][i][0].numpy() * input_tensor_size ) # tile_height x2 = ( detections["detection_boxes"][0][i][3].numpy() * input_tensor_size ) # tile_width y2 = ( detections["detection_boxes"][0][i][2].numpy() * input_tensor_size ) # tile_height bbox = [x1, y1, x2, y2] scaled_bbox = [] for number in bbox: scaled_bbox.append(number / ratio) repositioned_bbox = _reposition_bounding_box( scaled_bbox, tile_location ) confidence = detections["detection_scores"][0][i] label = _find_class_name( category_index, int(detections["detection_classes"][0][i]) ) objects_by_label.setdefault(label, []).append( Object(label, confidence, repositioned_bbox) ) predicted_objects.append(Object(label, confidence, repositioned_bbox)) # for label, objects in objects_by_label.items(): # idxs = _non_max_suppression(objects, iou_threshold) # for idx in idxs: # x1 = objects[idx].bbox[0] / img_width # y1 = objects[idx].bbox[1] / img_height # x2 = objects[idx].bbox[2] / img_width # y2 = objects[idx].bbox[3] / img_height # w = x2 - x1 # h = y2 - y1 # bbox = [x1, y1, w, h] # exportDetections.append( # fo.Detection( # label=objects[idx].label, # bounding_box=bbox, # confidence=objects[idx].score, # ) # ) objects = predicted_objects idxs = _non_max_suppression(objects, iou_threshold) for idx in idxs: x1 = objects[idx].bbox[0] / img_width y1 = objects[idx].bbox[1] / img_height x2 = objects[idx].bbox[2] / img_width y2 = objects[idx].bbox[3] / img_height w = x2 - x1 h = y2 - y1 bbox = [x1, y1, w, h] exportDetections.append( fo.Detection( label=objects[idx].label, bounding_box=bbox, confidence=objects[idx].score, ) ) # Store detections in a field name of your choice sample[prediction_field] = fo.Detections(detections=exportDetections) sample.save() end_time = time.time() print("{} - Processing {} took: {}s".format(len(exportDetections),sample.filepath, end_time - start_time)) for detect in exportDetections: print("\t - {} {}%".format(detect.label,detect.confidence))
def run_detection_model(dataset_name, training_name, prediction_field): model_path = ( "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model" ) min_score = 0.5 # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet. logging.info("Loading model...") start_time = time.time() tf.keras.backend.clear_session() detect_fn = tf.saved_model.load(model_path) infer = detect_fn.signatures["serving_default"] end_time = time.time() elapsed_time = end_time - start_time logging.info("Loading model took: " + str(elapsed_time) + "s") category_index = _load_label_map(training_name) dataset = fo.load_dataset(dataset_name) for sample in dataset.select_fields("filepath"): start_time = time.time() img = load_img(sample.filepath) img_array = img_to_array(img) input_tensor = np.expand_dims(img_array, 0) detections = detect_fn(input_tensor) exportDetections = [] for i, detectScore in enumerate(detections["detection_scores"][0]): if detectScore > min_score: print( "\t- {}: {}".format( _find_class_name(category_index, int(detections["detection_classes"][0][i])), detections["detection_scores"][0][i], ) ) label = _find_class_name(category_index, int(detections["detection_classes"][0][i])) confidence = detections["detection_scores"][0][i] # TF Obj Detect bounding boxes are: [ymin, xmin, ymax, xmax] # For Voxel 51 - Bounding box coordinates should be relative values # in [0, 1] in the following format: # [top-left-x, top-left-y, width, height] x1 = detections["detection_boxes"][0][i][1] y1 = detections["detection_boxes"][0][i][0] x2 = detections["detection_boxes"][0][i][3] y2 = detections["detection_boxes"][0][i][2] w = x2 - x1 h = y2 - y1 bbox = [x1, y1, w, h] exportDetections.append( fo.Detection(label=label, bounding_box=bbox, confidence=confidence) ) # Store detections in a field name of your choice sample[prediction_field] = fo.Detections(detections=exportDetections) sample.save() end_time = time.time() print("Processing {} took: {}s".format(sample.filepath, end_time - start_time))
for mask in instances.pred_masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") # Convert detections to FiftyOne format detections = [] for label, score, box in zip(labels, scores, boxes): # Convert to [top-left-x, top-left-y, width, height] # in relative coordinates in [0, 1] x [0, 1] x1, y1, x2, y2 = box rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h] detections.append( fo.Detection(label=classes[label], bounding_box=rel_box, confidence=score, masks=rles)) # Save predictions to dataset sample["mask_rcnn"] = fo.Detections(detections=detections) sample.save() print("Finished adding predictions") # In[ ]: session.view = predictions_view # In[ ]: # Only contains detections with confidence >= 0.15
def test_accuracy_yolov4(capsys): data_path = "/home/Develop/Dataset/Coco2017/validation/val2017" labels_path = "/home/Develop/Dataset/Coco2017/validation/valAnnotation/instances_val2017.json" # The type of the dataset being imported dataset_type = fo.types.COCODetectionDataset # Coco By default has 90 class # Detection use only 80 output # This is needed to map the 80 output from model prediction directly CocoMap=[1,2,3,4,5,6,7,8, 9,10,11,13,14,15,16,17, 18,19,20,21,22,23,24,25, 27,28,31,32,33,34,35,36, 37,38,39,40,41,42,43,44, 46,47,48,49,50,51,52,53, 54,55,56,57,58,59,60,61, 62,63,64,65,67,70,72,73, 74,75,76,77,78,79,80,81, 82,84,85,86,87,88,89,90] dataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="evaluate-detections-tutorial", ) dataset.persistent = True detectionDir="/home/Develop/Dataset/Coco2017/validation/val2017Pred/" #dataset.delete_sample_field("faster_rcnn") # KEY # Need to map Output classes from Ai4prod Yolo evaluation to Coco Class. # This happen beacuse on Coco annotation we have 90 classes while from Yolov4 prediction we have only 80 Class # So if Yolov4 output class is 0 on Coco annoation is class 1 CocoMap[0]=1 with capsys.disabled(): classes = dataset.default_classes with fo.ProgressBar() as pb: for sample in pb(dataset): image = Image.open(sample.filepath) w, h = image.size head, tail = os.path.split(sample.filepath) filename, file_extension = os.path.splitext(tail) # one image can have multiple detections detections=[] cvsPath= detectionDir + filename +".txt" if (os.path.isfile(cvsPath)): with open(cvsPath,"r") as file: reader = csv.reader(file) for row in reader: if(row[0]=="x1"): pass else: # Detection must be int value respect to original image size # left_x ,left_y , width , height x1= float(row[0]) y1= float(row[1]) width= float(row[2]) height= float(row[3]) # Coordinate Normalization rel_box = [x1/w, y1/h, width/w, height/h] score= row[4] cls = CocoMap[int(row[5])] # fiftyone require the correct Coco class label = classes[CocoMap[int(row[5])]] detections.append( fo.Detection( label=label, bounding_box=rel_box, confidence=score ) ) else: print("FILE NOT FOUND") #Save predictions data in Dataset sample["yolov4"] = fo.Detections(detections=detections) sample.save() print("EVALUATION") # eval_key= use always the same key to evaluate yolov4 predictions results = dataset.evaluate_detections( "yolov4", gt_field="ground_truth", eval_key="yolov4_eval", compute_mAP=True, ) print(results.mAP()) print(results.metrics()) assert results.mAp()> 0.47