Пример #1
0
def select_multi_class_train_eval_dataset(dataset_name, prediction_field, train_size):   

    dataset = fo.load_dataset(dataset_name)
    train_view = dataset.match_tags("multi_class_train")
    logging.info("Removing existing multi_class_train tags")
    for sample in train_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags))
            sample.save()
        except ValueError:
            pass
    logging.info("Removing existing multi_class_eval tags")
    eval_view = dataset.match_tags("multi_class_eval")
    for sample in eval_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags))
            sample.save()
        except ValueError:
            pass

    norm_models = dataset.distinct("norm_model.label")
    for norm_model in norm_models:
        view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("auto_aug_predict_tiled.detections").length()>0).shuffle()
        print("{}: {}".format(norm_model,len(view)))
        if len(view) >= 200:
            for sample in view[:100]:
                sample.tags.append("multi_class_train")
                sample.save()
            for sample in view[100:]:
                sample.tags.append("multi_class_eval")
                sample.save()
Пример #2
0
def main(
    dataset_name,
    label_map_path,
    groundtruth_loc_field_name,
    groundtruth_img_labels_field_name,
    prediction_field_name,
    iou_threshold,
):
    dataset = fo.load_dataset(dataset_name)

    evaluate_dataset(
        dataset=dataset,
        label_map_path=label_map_path,
        groundtruth_loc_field_name=groundtruth_loc_field_name,
        groundtruth_img_labels_field_name=groundtruth_img_labels_field_name,
        prediction_field_name=prediction_field_name,
        iou_threshold=iou_threshold,
    )

    print("Cloning True Positives to a new field...")
    tp_view = dataset.filter_detections(prediction_field_name,
                                        F("eval") == "true_positive")
    tp_view.clone_sample_field(prediction_field_name,
                               prediction_field_name + "_TP")

    print("Cloning False Positives to a new field...")
    fp_view = dataset.filter_detections(prediction_field_name,
                                        F("eval") == "false_positive")
    fp_view.clone_sample_field(prediction_field_name,
                               prediction_field_name + "_FP")
Пример #3
0
def test_accuracy_resnet50(capsys):
    detectionDir = "/home/Develop/Dataset/Imagenet/Validation-2012/prediction"

    dataset = fo.load_dataset("imagenet_validation")
    classes = dataset.default_classes

    with capsys.disabled():
        with fo.ProgressBar() as pb:

            detections = []
            for sample in pb(dataset):

                head, tail = os.path.split(sample.filepath)
                filename, file_extension = os.path.splitext(tail)
                cvsPath = detectionDir + "/" + filename + ".txt"

                with open(cvsPath, "r") as file:

                    reader = csv.reader(file)
                    for row in reader:

                        cls_index = row[0]

                        sample["resnet50"] = fo.Classification(
                            label=classes[int(cls_index)], )
                        sample.save()

        results = dataset.evaluate_classifications(
            "resnet50",
            gt_field="ground_truth",
            eval_key="resnet50_eval",
        )
        print(results.metrics())
        assert results.metrics()["accuracy"] > 0.74
Пример #4
0
def normalize_model_values(dataset_name):
    """Standardize plane model string values.

    The plane model string values received from ADS-B broadcasts
    are not standardized. An A319 model, for instance, could be
    represented as A319-112 or A319-115 or A39-132. This function
    helps standardize all model strings.

    Args:
        dataset - a voxel51 dataset object

    Returns:
        dataset - a voxel51 dataset object
    """
    # TODO: Need to add testing.

    dataset = fo.load_dataset(dataset_name)

    # json file storing plane model strings as key and standardized model
    # as value
    with open("plane_model_dict.json", "r") as file_path:
        plane_model_dict = json.load(file_path)

    # Loop thru each row of model column
    for sample in dataset.exists("model_name"):
        model = sample["model_name"].label
        norm_model = plane_model_dict.get(model, None)
        #print("{} = {}".format(model, norm_model))
        if norm_model is not None:
            sample["norm_model"] = fo.Classification(label=norm_model)
            sample.save()
        else:
            logging.info("Match not found for: %s", model)

    return dataset
Пример #5
0
def load_fo_dataset(dir, name):
    if not fo.dataset_exists(name):
        dataset = fo.Dataset.from_dir(dir, fo.types.COCODetectionDataset, name)
        dataset.persistent = True
    else:
        dataset = fo.load_dataset(name)
    return dataset
Пример #6
0
def cleanup_fo_dataset():
    dataset_test_names = ["_iv_test", "_iv_test_1"]
    yield
    for dataset_name in dataset_test_names:
        if dataset_name in list_datasets():
            ds = load_dataset(dataset_name)
            ds.delete()
            del ds
Пример #7
0
def add_normalized_model_to_plane_detection(dataset_name, prediction_field, output_field):
    dataset = fo.load_dataset(dataset_name)
    for sample in dataset.exists("norm_model"):
        new_detections = sample[prediction_field].copy()

        for detection in new_detections["detections"]:
            detection["label"] = sample["norm_model"]["label"]
        
        sample[output_field] = new_detections
        sample.save()
Пример #8
0
def build_multi_class_train_eval_dataset(dataset_name):   
    dataset = fo.load_dataset(dataset_name)
    norm_models = dataset.distinct("norm_model.label")
    for norm_model in norm_models:
        view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24")
        unique_aircraft = view.distinct("icao24.label")
        num_unique_aircrarft = len(unique_aircraft)
        if num_unique_aircrarft > 1:
            _tag_samples_by_icao24(dataset,unique_aircraft[0], "multi_class_train")
            for icao24 in unique_aircraft[1:]:
                _tag_samples_by_icao24(dataset,icao24, "multi_class_eval")
            print("{}: {}".format(norm_model,len(unique_aircraft)))
            print("\tTrain:{}".format(unique_aircraft[0]))
            print("\tEval:{}".format(unique_aircraft[1:]))
Пример #9
0
def evaluate_detection_model(dataset_name, prediction_field, evaluation_key,
                             ground_truth_field):

    dataset = fo.load_dataset(dataset_name)

    view = dataset.match_tags("multi_class_eval")

    # setting an empty detections field if there isn't one
    for sample in view:
        if sample[ground_truth_field] == None:
            sample[ground_truth_field] = fo.Detections(detections=[])
            sample.save()
        if sample[prediction_field] == None:
            sample[prediction_field] = fo.Detections(detections=[])
            sample.save()

    results = view.evaluate_detections(prediction_field,
                                       gt_field=ground_truth_field,
                                       eval_key=evaluation_key,
                                       compute_mAP=True)

    # Get the 10 most common classes in the dataset
    counts = view.count_values(
        "{}.detections.label".format(ground_truth_field))
    classes = sorted(counts, key=counts.get, reverse=True)[:15]

    # Print a classification report for the top-10 classes
    results.print_report(classes=classes)

    # Print some statistics about the total TP/FP/FN counts
    logging.info("TP: %d" % dataset.sum(evaluation_key + "_tp"))
    logging.info("FP: %d" % dataset.sum(evaluation_key + "_fp"))
    logging.info("FN: %d" % dataset.sum(evaluation_key + "_fn"))

    # Create a view that has samples with the most false positives first, and
    # only includes false positive boxes in the `predictions` field
    eval_view = view.sort_by(evaluation_key + "_fp",
                             reverse=True).filter_labels(
                                 prediction_field,
                                 F(evaluation_key) == "fp")
    logging.info("mAP: {}".format(results.mAP()))

    plot = results.plot_pr_curves(classes=classes, backend="matplotlib")
    plot.savefig("/tf/dataset-export/" + evaluation_key + '_pr_curves.png')

    plot = results.plot_confusion_matrix(classes=classes, backend="matplotlib")
    plot.savefig("/tf/dataset-export/" + evaluation_key +
                 '_confusion_matrix.png')
Пример #10
0
def random_multi_class_train_eval_dataset(dataset_name):   
    """Splits the dataset into Training and Eval samples. For aircraft models with
    more than one example, the aircraft bodies will be divide, 75% to Train and 
    25% to Eval. The samples are separated using tags.

    Args:
        dataset_name ([type]): [description]
    """
    dataset = fo.load_dataset(dataset_name)
    train_view = dataset.match_tags("multi_class_train")
    logging.info("Removing existing multi_class_train tags")
    for sample in train_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags))
            sample.save()
        except ValueError:
            pass
    logging.info("Removing existing multi_class_eval tags")
    eval_view = dataset.match_tags("multi_class_eval")
    for sample in eval_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags))
            sample.save()
        except ValueError:
            pass

    norm_models = dataset.distinct("norm_model.label")
    for norm_model in norm_models:
        view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("multi_class_detections.detections").length()>0).shuffle()
        unique_aircraft = view.distinct("icao24.label")

        train_count = math.floor(len(view)*.75)
        eval_count = math.floor(len(view)*.25)
        for sample in view[:train_count]:
            sample.tags.append("multi_class_train")
            sample.save() 
        for sample in view[train_count:]:
            sample.tags.append("multi_class_eval")
            sample.save() 

        print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(view),train_count,eval_count))   

    view = dataset.match(F("multi_class_detections.detections").length()==0).take(250)
    for sample in view:
        sample.tags.append("multi_class_train")
        sample.save() 
Пример #11
0
def export_yolo_multi_class_dataset(dataset_name, label_field, tag, export_name):
    export_title = tag + "_" + export_name
    export_dir = "/tf/dataset-export/" + export_title + "/"    
    logging.info("Export samples tagged: {} to {} and labeling them using: {}".format(tag,export_dir,label_field))
    dataset = fo.load_dataset(dataset_name)
    view = dataset.match_tags(tag).sort_by("norm_model.label")

    # The type of dataset to export
    # Any subclass of `fiftyone.types.Dataset` is supported
    dataset_type = fo.types.YOLOv4Dataset

    # Export the dataset!
    view.export( export_dir=export_dir, dataset_type=dataset_type, label_field=label_field)

    export_file = "/tf/dataset-export/" + export_title + ".tar.gz"

    # nosemgrep:github.workflows.config.subprocess-shell-true
    subprocess.run("/bin/tar -zcvf {} {}".format(export_file, export_dir), shell=True)
Пример #12
0
def delete_dataset(name):
    """Deletes the FiftyOne dataset with the given name.

    If reference to the dataset exists in memory, only `Dataset.name` and
    `Dataset.deleted` will be valid attributes. Accessing any other attributes
    or methods will raise a :class:`DatasetError`

    If reference to a sample exists in memory, the sample's dataset will be
    "unset" such that `sample.in_dataset == False`

    Args:
        name: the name of the dataset

    Raises:
        ValueError: if the dataset is not found
    """
    dataset = fo.load_dataset(name)
    dataset.delete()
Пример #13
0
def split_multi_class_train_eval_dataset(dataset_name):   
    """Splits the dataset into Training and Eval samples. For aircraft models with
    more than one example, the aircraft bodies will be divide, 75% to Train and 
    25% to Eval. The samples are separated using tags.

    Args:
        dataset_name (): The name of the Voxel51 dataset to use
    """
    dataset = fo.load_dataset(dataset_name)
    train_view = dataset.match_tags("multi_class_train")

    # Remove any existing tags from the dataset to ensure that you are starting fresh
    logging.info("Removing existing multi_class_train tags")
    for sample in train_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags))
            sample.save()
        except ValueError:
            pass
    logging.info("Removing existing multi_class_eval tags")
    eval_view = dataset.match_tags("multi_class_eval")
    for sample in eval_view:
        try:
            sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags))
            sample.save()
        except ValueError:
            pass
    # find all of the unique normalized aircraft models
    norm_models = dataset.distinct("norm_model.label")
    for norm_model in norm_models:
        view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24").shuffle()
        unique_aircraft = view.distinct("icao24.label")
        if len(unique_aircraft) > 1:
            train_aircraft = unique_aircraft[:math.floor(len(unique_aircraft)*.75)]
            eval_aircraft = unique_aircraft[math.floor(len(unique_aircraft)*.75):]
            print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(unique_aircraft),len(train_aircraft),len(eval_aircraft)))     
            
            for icao24 in train_aircraft[:1]:
                _tag_samples_by_icao24(dataset,icao24, "multi_class_train", False)
            for icao24 in train_aircraft[1:]:
                _tag_samples_by_icao24(dataset,icao24, "multi_class_train", True)
        
            for icao24 in eval_aircraft:
                _tag_samples_by_icao24(dataset,icao24, "multi_class_eval", True)
Пример #14
0
    def load_sample(
        self, sample: Mapping[str, str]
    ) -> Mapping[str, Union[torch.Tensor, torch.Size]]:
        _fo_dataset = fo.load_dataset(self._fo_dataset_name)

        img_path = sample[DefaultDataKeys.INPUT]
        fo_sample = _fo_dataset[img_path]

        img: torch.Tensor = torchvision.io.read_image(img_path)  # CxHxW
        img_labels: torch.Tensor = torch.from_numpy(
            fo_sample[self.label_field].mask)  # HxW

        sample[DefaultDataKeys.INPUT] = img.float()
        sample[DefaultDataKeys.TARGET] = img_labels.float()
        sample[DefaultDataKeys.METADATA] = {
            "filepath": img_path,
            "size": img.shape,
        }
        return sample
Пример #15
0
def create_voxel51_dataset(dataset_name):
    """Create a voxel51 dataset or load existing one.

    Args:
        dataset_name: name of the voxel51 dataset to create or load

    Returns:
        dataset (voxel51 dataset object)
    """
    # attempt to open dataset
    try:
        dataset = fo.Dataset(name=dataset_name)
        dataset.persistent = True
        logging.info("Created %s dataset", dataset_name)
    # If the dataset already exists, load it instead
    except ValueError:
        dataset = fo.load_dataset(name=dataset_name)
        logging.info("Dataset already exists. Loaded %s dataset", dataset_name)

    return dataset
Пример #16
0
def merge_labelbox_dataset_with_voxel51(voxel51_dataset_name,
                                        labelbox_json_path,
                                        labelbox_id_field="labelbox_id"):
    """Merge the labels created via labelbox with the voxel51 dataset.

    The json referenced in the labelbox_json_path must be manually downloaded
    from the labelbox website.

    Args:
        voxel51_dataset_name (str)
        labelbox_json_path (str) - a path to
        lablebox_id_field (str) - unique ID required for merging of dataset

    Returns:
        None
    """
    dataset = fo.load_dataset(voxel51_dataset_name)

    foul.import_from_labelbox(dataset,
                              labelbox_json_path,
                              labelbox_id_field=labelbox_id_field)
Пример #17
0
def test_accuracy_u2Squared(capsys):
    """
    
    Test U2Squared FScore on DUST-TE Dataset

    """

    dataset = fo.load_dataset("duts_te_validation")

    DatasetPathPrediction = "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Image-Pred-Mask/"
    with capsys.disabled():
        with fo.ProgressBar() as pb:
            for sample in dataset:

                head, tail = os.path.split(sample.filepath)

                maskPredPath = DatasetPathPrediction + tail

                print(maskPredPath)

                maskPred = cv2.imread(maskPredPath, cv2.IMREAD_UNCHANGED)
                ret, maskPred = cv2.threshold(maskPred, 127, 255,
                                              cv2.THRESH_BINARY)

                sample["u2squared"] = fo.Segmentation(mask=maskPred)
                sample.save()

        results = dataset.evaluate_segmentations(
            "u2squared",
            gt_field="ground_truth",
            eval_key="eval_segmentation",
        )

        print(results.metrics())
        # 0.80 is the value reported in the paper as weighted F-measure for DUTS-TE Dataset
        assert (results.metrics()["fscore"] > 0.80)
Пример #18
0
def create_fo_dataset(
        detections: List[Any],
        dataset_name: str,
        exist_ok: bool = False,
        persistent=False,
        field_name: str = "icevision_record",
        transformations=None,  # Undo postprocess via sample: icevision.models.inference.postprocess_bbox
        undo_bbox_tfms_fn: Callable[[Union[
            List[int], BBox]], List[int]] = None,  # Undo postprocess custom fn
) -> Dataset:
    """
    Takes an iter of either Predictions or records and created or adds them to a fo.Dataset

    A record contains a preprocessed image of you data. However, fiftyone expects an filepath to you image. Thus, we need to undo the pre-processing
    of the bounding box. Therefore, we have provide 2 methods

        - The postprocess_bbox() function form icevision.models.inference

        - A custom undo_bbox_tfms_fn

    If sample is not defined, a new sample is created.

    Parameters
    ----------
    detections: An iterable of iv Predictions or records
    dataset_name: Name of dataset
    exist_ok: Whether the data should be added to an existing dataset (active Opt-in)
    persistent:  Whether a dataset should be persistent on creation
    field_name: The field name that is provided if records are in the iterable

    transformations: list of model-pre-processing transforms
    undo_bbox_tfms_fn: Custom function, that undoes transformations

    Returns
    -------
    fo.Dataset
    """

    if not fiftyone_available:
        raise ImportError("Fiftyone is not installed on you system.")

    # Validate input
    if dataset_name in list_datasets():
        if exist_ok:
            _internal_dataset = load_dataset(dataset_name)
        else:
            raise ValueError(
                f"Dataset with name {dataset_name} already exist and exist_ok is {exist_ok}"
            )
    else:
        _internal_dataset = Dataset(name=dataset_name, persistent=persistent)

    _sample_list = []
    for element in detections:
        if isinstance(element, Prediction):
            _sample_list.append(
                convert_prediction_to_fo_sample(
                    element,
                    transformations=transformations,
                    undo_bbox_tfms_fn=undo_bbox_tfms_fn,
                ))
        else:
            _sample_list.append(
                convert_record_to_fo_sample(
                    element,
                    field_name=field_name,
                    transformations=transformations,
                    undo_bbox_tfms_fn=undo_bbox_tfms_fn,
                ))

    _internal_dataset.add_samples(_sample_list)

    return _internal_dataset
Пример #19
0
def load_zoo_dataset(
    name,
    split=None,
    splits=None,
    dataset_dir=None,
    download_if_necessary=True,
    drop_existing_dataset=False,
):
    """Loads the dataset of the given name from the FiftyOne Dataset Zoo as
    a :class:`fiftyone.core.dataset.Dataset`.

    By default, the dataset will be downloaded if it does not already exist in
    the specified directory.

    Args:
        name: the name of the zoo dataset to load. Call
            :func:`list_zoo_datasets` to see the available datasets
        split (None) a split to load, if applicable. Typical values are
            ``("train", "validation", "test")``. If neither ``split`` nor
            ``splits`` are provided, all available splits are loaded. Consult
            the documentation for the :class:`ZooDataset` you specified to see
            the supported splits
        splits (None): a list of splits to load, if applicable. Typical values
            are ``("train", "validation", "test")``. If neither ``split`` nor
            ``splits`` are provided, all available splits are loaded. Consult
            the documentation for the :class:`ZooDataset` you specified to see
            the supported splits
        dataset_dir (None): the directory in which the dataset is stored or
            will be downloaded. By default,
            :func:`fiftyone.core.dataset.get_default_dataset_dir` is used
        download_if_necessary (True): whether to download the dataset if it is
            not found in the specified dataset directory
        drop_existing_dataset (False): whether to drop an existing dataset
            with the same name if it exists

    Returns:
        a :class:`fiftyone.core.dataset.Dataset`
    """
    splits = _parse_splits(split, splits)

    if download_if_necessary:
        info, dataset_dir = download_zoo_dataset(name,
                                                 splits=splits,
                                                 dataset_dir=dataset_dir)
        zoo_dataset = info.get_zoo_dataset()
    else:
        zoo_dataset, dataset_dir = _parse_dataset_details(name, dataset_dir)
        info = zoo_dataset.load_info(dataset_dir)

    dataset_name = zoo_dataset.name
    if splits is not None:
        dataset_name += "-" + "-".join(splits)

    if fo.dataset_exists(dataset_name):
        if not drop_existing_dataset:
            logger.info(
                "Loading existing dataset '%s'. To reload from disk, first "
                "delete the existing dataset",
                dataset_name,
            )
            return fo.load_dataset(dataset_name)

        fo.delete_dataset(dataset_name)

    if splits is None and zoo_dataset.has_splits:
        splits = zoo_dataset.supported_splits

    dataset = fo.Dataset(dataset_name)
    dataset_type = info.get_dataset_type()

    if splits:
        for split in splits:
            split_dir = zoo_dataset.get_split_dir(dataset_dir, split)
            tags = [split]

            logger.info("Loading '%s' split '%s'", zoo_dataset.name, split)
            dataset.add_dir(split_dir, dataset_type, tags=tags)
    else:
        logger.info("Loading '%s'", zoo_dataset.name)
        dataset.add_dir(dataset_dir, dataset_type)

    if info.classes is not None:
        dataset.info["classes"] = info.classes
        dataset.save()

    return dataset
Пример #20
0
def run_detection_model(dataset_name, training_name, prediction_field):
    model_path = (
        "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model"
    )
    min_score = 0.5  # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet.

    logging.info("Loading model...")
    start_time = time.time()
    tf.keras.backend.clear_session()
    detect_fn = tf.saved_model.load(model_path)
    infer = detect_fn.signatures["serving_default"]
    end_time = time.time()
    elapsed_time = end_time - start_time
    logging.info("Loading model took: " + str(elapsed_time) + "s")

    category_index = _load_label_map(training_name)

    dataset = fo.load_dataset(dataset_name)

    for sample in dataset.select_fields("filepath"):

        start_time = time.time()
        img = load_img(sample.filepath)
        img_array = img_to_array(img)
        input_tensor = np.expand_dims(img_array, 0)
        detections = detect_fn(input_tensor)
        exportDetections = []

        for i, detectScore in enumerate(detections["detection_scores"][0]):
            if detectScore > min_score:
                print(
                    "\t- {}: {}".format(
                        _find_class_name(category_index, int(detections["detection_classes"][0][i])),
                        detections["detection_scores"][0][i],
                    )
                )

                label = _find_class_name(category_index, int(detections["detection_classes"][0][i]))
                confidence = detections["detection_scores"][0][i]
                # TF Obj Detect bounding boxes are: [ymin, xmin, ymax, xmax]

                # For Voxel 51 - Bounding box coordinates should be relative values
                # in [0, 1] in the following format:
                # [top-left-x, top-left-y, width, height]
                x1 = detections["detection_boxes"][0][i][1]
                y1 = detections["detection_boxes"][0][i][0]
                x2 = detections["detection_boxes"][0][i][3]
                y2 = detections["detection_boxes"][0][i][2]
                w = x2 - x1
                h = y2 - y1
                bbox = [x1, y1, w, h]

                exportDetections.append(
                    fo.Detection(label=label, bounding_box=bbox, confidence=confidence)
                )

        # Store detections in a field name of your choice
        sample[prediction_field] = fo.Detections(detections=exportDetections)
        sample.save()
        end_time = time.time()

        print("Processing {} took: {}s".format(sample.filepath, end_time - start_time))
Пример #21
0
def dataset_test():
	if True:
		my_dataset_name = "quickstart"
		if my_dataset_name in fo.list_datasets():
			dataset = fo.load_dataset(my_dataset_name)
		else:
			dataset = foz.load_zoo_dataset(my_dataset_name)
	elif False:
		dataset = foz.load_zoo_dataset(
			"coco-2017",
			split="validation",
			dataset_name="evaluate-detections-tutorial",
		)
	elif False:
		dataset = foz.load_zoo_dataset(
			"open-images-v6",
			split="validation",
			max_samples=100,
			seed=51,
			shuffle=True,
		)
	elif True:
		print("Datasets = {}.".format(fo.list_datasets()))

		my_dataset_name = "my_dataset"
		try:
			# REF [site] >> https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html
			if True:
				dataset_dir_path = "/path/to/data"
				dataset = fo.Dataset.from_dir(
					dataset_dir=dataset_dir_path,
					dataset_type=fo.types.ImageClassificationDirectoryTree,
					name=my_dataset_name,
				)
			elif False:
				# The directory containing the source images.
				data_dir_path = "/path/to/images"
				# The path to the COCO labels JSON file.
				label_filepath = "/path/to/coco-labels.json"
				dataset = fo.Dataset.from_dir(
					dataset_type=fo.types.COCODetectionDataset,
					data_path=data_dir_path,
					labels_path=label_filepath,
				)
		except ValueError:
			dataset = fo.load_dataset(my_dataset_name)

		#fo.delete_dataset(my_dataset_name)
	elif False:
		if True:
			# Create a dataset from a list of images.
			dataset = fo.Dataset.from_images(
				["/path/to/image1.jpg", "/path/to/image2.jpg",]
			)
		elif False:
			# Create a dataset from a directory of images.
			dataset = fo.Dataset.from_images_dir("/path/to/images")
		elif False:
			# Create a dataset from a glob pattern of images.
			dataset = fo.Dataset.from_images_patt("/path/to/images/*.jpg")
	elif False:
		if True:
			# Create a dataset from a list of videos
			dataset = fo.Dataset.from_videos(
				["/path/to/video1.mp4", "/path/to/video2.mp4",]
			)
		elif False:
			# Create a dataset from a directory of videos.
			dataset = fo.Dataset.from_videos_dir("/path/to/videos")
		elif False:
			# Create a dataset from a glob pattern of videos.
			dataset = fo.Dataset.from_videos_patt("/path/to/videos/*.mp4")
	dataset.persistent = True

	print("Media type = {}.".format(dataset.media_type))
	print("Persistence = {}.".format(dataset.persistence))
	print("#examples = {}.".format(len(dataset)))
	#print("#examples = {}.".format(dataset.count()))

	# Print some information about the dataset.
	print(dataset)

	# Print a ground truth detection.
	sample = dataset.first()
	if sample.ground_truth and hasattr(sample.ground_truth, "detections"):
		print(sample.ground_truth.detections[0])
Пример #22
0
def run_detection_model_tiled(
    dataset_name,
    training_name,
    prediction_field,
    sample_tag,
    tile_string,
    tile_overlap:int,
    iou_threshold:float,
):
    """Runs the detection model over the entire dataset using a tiling approach
    Args:
      interpreter: The ``tf.lite.Interpreter`` to update.
      size (tuple): The original image size as (width, height) tuple.
      resize: A function that takes a (width, height) tuple, and returns an
        image resized to those dimensions.
    Returns:
      The resized tensor with zero-padding as tuple
      (resized_tensor, resize_ratio).
    """
    model_path = (
        "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model"
    )

    min_score = 0.50  # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet.
    input_tensor_size = 512

    logging.info("Loading model...")
    start_time = time.time()
    tf.keras.backend.clear_session()
    detect_fn = tf.saved_model.load(model_path)
    infer = detect_fn.signatures["serving_default"]
    print(infer.structured_outputs)
    print(infer)
    end_time = time.time()
    elapsed_time = end_time - start_time
    logging.info("Loading model took: " + str(elapsed_time) + "s")

    category_index = _load_label_map(training_name)

    dataset = fo.load_dataset(dataset_name)

    # Go through all of the samples in the dataset
    for sample in dataset.match_tags(sample_tag).select_fields("filepath"):

        start_time = time.time()
        img = load_img(
            sample.filepath,
        )
        img_size = img.size
        img_width, img_height = img_size
        objects_by_label = dict()
        exportDetections = []
        predicted_objects = []
        tile_sizes = []

        for tile_size in tile_string.split(","):
            tile_size = tile_size.split("x")
            tile_sizes.append([int(tile_size[0]), int(tile_size[1])])

        # Collect all of the detections for each tile size:
        for tile_size in tile_sizes:
            tile_width, tile_height = tile_size

            # For tiles that are smaller that the image size, calculated all of the different
            # Sub images that are needed
            for tile_location in _tiles_location_gen(img_size, tile_size, tile_overlap):

                tile = img.crop(tile_location)

                old_size = tile.size  # old_size[0] is in (width, height) format

                ratio = float(input_tensor_size) / max(old_size)
                if ratio > 1:
                    continue
                new_size = tuple([int(x * ratio) for x in old_size])

                im = tile.resize(new_size, Image.ANTIALIAS)
                # create a new image and paste the resized on it

                new_im = Image.new("RGB", (input_tensor_size, input_tensor_size))
                new_im.paste(
                    im, (0, 0)
                )  # ((input_tensor_size-new_size[0])//2, (input_tensor_size-new_size[1])//2))

                img_array = img_to_array(new_im, dtype="uint8")
                img_batch = np.array([img_array])

                detections = detect_fn(img_batch)
                for i, detectScore in enumerate(detections["detection_scores"][0]):
                    if detectScore > min_score:

                        x1 = (
                            detections["detection_boxes"][0][i][1].numpy()
                            * input_tensor_size
                        )  # tile_width
                        y1 = (
                            detections["detection_boxes"][0][i][0].numpy()
                            * input_tensor_size
                        )  # tile_height
                        x2 = (
                            detections["detection_boxes"][0][i][3].numpy()
                            * input_tensor_size
                        )  # tile_width
                        y2 = (
                            detections["detection_boxes"][0][i][2].numpy()
                            * input_tensor_size
                        )  # tile_height
                        bbox = [x1, y1, x2, y2]

                        scaled_bbox = []
                        for number in bbox:
                            scaled_bbox.append(number / ratio)
                        repositioned_bbox = _reposition_bounding_box(
                            scaled_bbox, tile_location
                        )
                        confidence = detections["detection_scores"][0][i]
                        label = _find_class_name(
                            category_index, int(detections["detection_classes"][0][i])
                        )
                        objects_by_label.setdefault(label, []).append(
                            Object(label, confidence, repositioned_bbox)
                        )
                        predicted_objects.append(Object(label, confidence, repositioned_bbox))

        
        # for label, objects in objects_by_label.items():
        #     idxs = _non_max_suppression(objects, iou_threshold)
        #     for idx in idxs:
        #         x1 = objects[idx].bbox[0] / img_width
        #         y1 = objects[idx].bbox[1] / img_height
        #         x2 = objects[idx].bbox[2] / img_width
        #         y2 = objects[idx].bbox[3] / img_height

        #         w = x2 - x1
        #         h = y2 - y1
        #         bbox = [x1, y1, w, h]
        #         exportDetections.append(
        #             fo.Detection(
        #                 label=objects[idx].label,
        #                 bounding_box=bbox,
        #                 confidence=objects[idx].score,
        #             )
        #         )

        objects = predicted_objects
        idxs = _non_max_suppression(objects, iou_threshold)
        for idx in idxs:
            x1 = objects[idx].bbox[0] / img_width
            y1 = objects[idx].bbox[1] / img_height
            x2 = objects[idx].bbox[2] / img_width
            y2 = objects[idx].bbox[3] / img_height

            w = x2 - x1
            h = y2 - y1
            bbox = [x1, y1, w, h]
            exportDetections.append(
                fo.Detection(
                    label=objects[idx].label,
                    bounding_box=bbox,
                    confidence=objects[idx].score,
                )
            )

        # Store detections in a field name of your choice
        sample[prediction_field] = fo.Detections(detections=exportDetections)
        sample.save()
        end_time = time.time()
        print("{} - Processing {} took: {}s".format(len(exportDetections),sample.filepath, end_time - start_time))
        for detect in exportDetections:
            print("\t - {} {}%".format(detect.label,detect.confidence))
Пример #23
0
    export_voxel51_dataset_to_tfrecords,
    get_num_classes_from_label_map,
    load_base_models_json,
    save_mapping_to_file,
    set_filenames,
    _create_list_of_class_names,
)

# from labelbox_utils import merge_labelbox_dataset_with_voxel51
from main import read_config

# pylint: disable=C0103, W0107

# delete datasets first to create repeatable test environment
try:
    fo.load_dataset("test").delete()
except ValueError:
    pass
try:
    fo.load_dataset("test_detection_mapping").delete()
except ValueError:
    pass


def test_build_image_list():
    """Test build_image_list()."""
    output = build_image_list("test")
    assert output[0]["bearing"] == "194"
    assert output[0]["distance"] == "11882"
    assert output[0]["elevation"] == "50"
    assert output[0][
Пример #24
0
def add_faa_data_to_voxel51_dataset(
    voxel51_dataset_name, faa_master_dataset_path, faa_reference_dataset_path
):
    """Add FAA data to each entry in voxel51 dataset.

    Args:
        voxel51_dataset (str) - the voxel51 dataset name
        faa_master_dataset_path - path to FAA master dataset .txt
        faa_reference_dataset_path - path to FAA reference dataset .txt

    Returns:
        dataset (voxel51 dataset object)
    """
    subprocess.run("./install_faa_data.sh", check=True)

    # import master dataset and strip white space from beacon column
    planes_master = pd.read_csv(faa_master_dataset_path, index_col="MODE S CODE HEX")
    planes_master.index = planes_master.index.str.strip()

    planes_reference = pd.read_csv(
        faa_reference_dataset_path, index_col="CODE", encoding="utf-8-sig"
    )

    dataset = fo.load_dataset(voxel51_dataset_name)

    for row in dataset:
        # render plane_id in lowercase letters
        plane_icao24 = row["icao24"].label.upper()
        # find plane model code associated with the icao24 code, i.e. mode s code hex
        try:
            model_code = planes_master.loc[plane_icao24, "MFR MDL CODE"]
        except IndexError:
            logging.info(
                "Plane ID not found in master dataset. Plane ID: %s", plane_icao24
            )
            continue
        except KeyError:
            logging.info(
                "Plane ID not found in master dataset. Plane ID: %s", plane_icao24
            )
            continue
        # find reference row with all relevant model data
        plane_reference_row = planes_reference.loc[model_code]
        # exract all relevant data from plane_reference_row
        # convert all fields to string
        manufacturer = str(plane_reference_row["MFR"]).rstrip()
        model_name = str(plane_reference_row["MODEL"]).rstrip()
        aircraft_type = str(plane_reference_row["TYPE-ACFT"])
        engine_type = str(plane_reference_row["TYPE-ENG"])
        num_engines = str(plane_reference_row["NO-ENG"])
        num_seats = str(plane_reference_row["NO-SEATS"])
        aircraft_weight = str(plane_reference_row["AC-WEIGHT"])
        # norm_model = normalize_single_model_value(model_name)

        # store values in voxel51 dataset row
        row["model_code"] = fo.Classification(label=model_code)
        row["manufacturer"] = fo.Classification(label=manufacturer)
        row["model_name"] = fo.Classification(label=model_name)
        row["aircraft_type"] = fo.Classification(label=aircraft_type)
        row["engine_type"] = fo.Classification(label=engine_type)
        row["num_engines"] = fo.Classification(label=num_engines)
        row["num_seats"] = fo.Classification(label=num_seats)
        row["aircraft_weight"] = fo.Classification(label=aircraft_weight)

        # if norm_model is not None:
        #    sample["norm_model"] = fo.Classification(label=norm_model)
        row.save()

    return dataset
Пример #25
0
def upload_vox51_dataset_to_labelbox(
    labelbox_api_key,
    labelbox_dataset_name,
    labelbox_project_name,
    voxel51_dataset_name,
    upload_num_samples: int = 500,
    upload_tag="train",
    avoid_tag="eval",
    resume: bool = False,
    labelbox_id_field="labelbox_id",
):
    """Upload a voxel51 dataset to labelbox.

    Args:
        labelbox_api_key (str)
        labelbox_dataset_name (str)
        labelbox_project_name (str)
        voxel51_dataset_name (str)
        lablebox_id_field (str) - unique ID required for upload of dataset
        upload_num_samples (int) - number of images to randomly choose for upload
        upload_tag (str) - tag that is added to all of the samples selected for upload
        avoid_tag (str) - do not select samples with this tag
        resume (bool) - continue an upload to Labelbox if a prior one failed to complete
    Returns:
        None
    """
    # TODO: Some sort of problem related to labelbox ID
    logging.info("Uploading voxel51 dataset to Labelbox.")

    client = Client(labelbox_api_key)
    # must convert PaginatedCollection to list in order to count length
    projects = list(
        client.get_projects(where=Project.name == labelbox_project_name))

    # ensure there is only labelbox project of specified name
    num_labelbox_projects = len(projects)
    if num_labelbox_projects != 1:
        logging.error(
            "Expected a single project named: %s but found %s projects",
            labelbox_project_name,
            num_labelbox_projects,
        )
        sys.exit(1)

    project = list(projects)[0]

    # select proper labelbox dataset
    # must convert PaginatedCollection to list in order to count length
    labelbox_datasets = list(
        project.datasets(where=Dataset.name == labelbox_dataset_name))

    # ensure there is only one labelbox dataset of specified name
    num_labelbox_datasets = len(labelbox_datasets)
    if num_labelbox_datasets != 1:
        logging.info(
            "Expected a single dataset named: {} but found {} projects",
            labelbox_dataset_name,
            num_labelbox_datasets,
        )
        sys.exit(1)

    labelbox_dataset = list(labelbox_datasets)[0]

    # set up voxel51 and labelbox connections
    dataset = fo.load_dataset(voxel51_dataset_name)

    # continue a previous upload that failed
    if resume:
        # create a view with the previously selected samples
        # samples that were successfully uploaded will be skipped
        view = dataset.match_tags(upload_tag)
    else:
        # take random sample of images and upload to labelbox
        stage = fo.MatchTags(avoid_tag, bool=False)
        view = dataset.add_stage(stage).shuffle().take(upload_num_samples)

        # add uplod_tag to all of the samples being sent to labelbox
        for sample in view:
            sample.tags.append(upload_tag)
            sample.save()

    foul.upload_media_to_labelbox(labelbox_dataset, view, labelbox_id_field)
Пример #26
0
    ret,mask= cv2.threshold(mask, 127,255,cv2.THRESH_BINARY)
    
    sample = fo.Sample(filepath=maskPath,
                    ground_truth=fo.Segmentation(mask=mask))
    
    samples.append(sample)

dataset.add_samples(samples)

# ADD PREDICTION
#To add prediction you need
# 1) Cycle over all dataset sample GT
# 2) For each sample add a custom field u2squared 
 

dataset = fo.load_dataset("duts_te_validation")

DatasetPathPrediction= "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Image-Pred-Mask/"

with fo.ProgressBar() as pb:
    for sample in dataset:
        
        head, tail = os.path.split(sample.filepath)

        maskPredPath = DatasetPathPrediction + tail

        maskPred= cv2.imread(maskPredPath,cv2.IMREAD_UNCHANGED)
        ret,maskPred= cv2.threshold(maskPred, 127,255,cv2.THRESH_BINARY)

        sample["u2squared"]= fo.Segmentation(mask= maskPred)
        sample.save()