def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Produce image detection predictions.

        Parameters
        ----------
            inputs  : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name,
                      and bounding box for each image.

        Returns
        -------
            outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as
                      a string (8 coordinate format), and confidence scores.
        """
        iou_threshold = (
            0.5  # Bounding box overlap threshold for false positive or true positive
        )
        score_threshold = 0.05  # The score confidence threshold to use for detections
        max_detections = 100  # Maxmimum number of detections to use per image

        # Create object that stores backbone information
        backbone = models.backbone(self.hyperparams["backbone"])

        # Create the generators
        train_generator = CSVGenerator(
            self.annotations,
            self.classes,
            self.base_dir,
            self.hyperparams["batch_size"],
            backbone.preprocess_image,
            shuffle_groups=False,
        )

        # Instantiate model
        model, training_model, prediction_model = self._create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=train_generator.num_classes(),
            lr=self.hyperparams["learning_rate"],
        )

        # Load model weights saved in fit
        training_model.load_weights(self.hyperparams["weights_path"] +
                                    "model_weights.h5")

        # Convert training model to inference model
        inference_model = models.convert_model(training_model)

        # Generate image paths
        image_cols = inputs.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/FileName")
        self.base_dir = [
            inputs.metadata.query(
                (metadata_base.ALL_ELEMENTS,
                 t))["location_base_uris"][0].replace("file:///", "/")
            for t in image_cols
        ]
        self.image_paths = np.array([[
            os.path.join(self.base_dir, filename)
            for filename in inputs.iloc[:, col]
        ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten()
        self.image_paths = pd.Series(self.image_paths)

        # Initialize output objects
        box_list = []
        score_list = []
        image_name_list = []

        # Predict bounding boxes and confidence scores for each image
        image_list = [
            x for i, x in enumerate(self.image_paths.tolist())
            if self.image_paths.tolist().index(x) == i
        ]

        start_time = time.time()
        logger.info("Starting testing...")

        for i in image_list:
            image = read_image_bgr(i)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            boxes, scores, labels = inference_model.predict_on_batch(
                tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32))

            # correct for image scale
            boxes /= scale

            for box, score in zip(boxes[0], scores[0]):
                if score < 0.5:
                    break

                b = box.astype(int)
                box_list.append(b)
                score_list.append(score)
                image_name_list.append(i * len(b))

        logger.info(
            f"Testing complete. Testing took {time.time()-start_time} seconds."
        )

        ## Convert predicted boxes from a list of arrays to a list of strings
        boxes = np.array(box_list).tolist()
        boxes = list(
            map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]],
                boxes))  # Convert to 8 coordinate format for D3M
        boxes = list(map(lambda x: ",".join(map(str, x)), boxes))

        # Create mapping between image names and D3M index
        input_df = pd.DataFrame({
            "d3mIndex":
            inputs.d3mIndex,
            "image": [os.path.basename(list) for list in self.image_paths],
        })

        d3mIdx_image_mapping = input_df.set_index("image").T.to_dict("list")

        # Extract values for image name keys and get missing image predictions (if they exist)
        image_name_list = [os.path.basename(list) for list in image_name_list]
        d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list]
        empty_predictions_image_names = [
            k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx
        ]
        d3mIdx = [item for sublist in d3mIdx
                  for item in sublist]  # Flatten list of lists

        ## Assemble in a Pandas DataFrame
        results = pd.DataFrame({
            "d3mIndex": d3mIdx,
            "bounding_box": boxes,
            "confidence": score_list
        })

        # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return
        # predictions for an image, create a dummy empty prediction row to add to results_df for that
        # missing image.
        if len(empty_predictions_image_names) != 0:
            # Create data frame of empty predictions for missing each image and concat with results.
            # Sort results_df.
            empty_predictions_df = self._fill_empty_predictions(
                empty_predictions_image_names, d3mIdx_image_mapping)
            results_df = pd.concat([results, empty_predictions_df
                                    ]).sort_values("d3mIndex")
        else:
            results_df = results

        # Convert to DataFrame container
        results_df = d3m_DataFrame(results_df)

        ## Assemble first output column ('d3mIndex)
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "d3mIndex"
        col_dict["semantic_types"] = (
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/PrimaryKey",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 0), col_dict)

        ## Assemble second output column ('bounding_box')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "bounding_box"
        col_dict["semantic_types"] = (
            "http://schema.org/Text",
            "https://metadata.datadrivendiscovery.org/types/PredictedTarget",
            "https://metadata.datadrivendiscovery.org/types/BoundingPolygon",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 1), col_dict)

        ## Assemble third output column ('confidence')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2)))
        col_dict["structural_type"] = type("1")
        col_dict["name"] = "confidence"
        col_dict["semantic_types"] = (
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/Score",
        )
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 2), col_dict)

        return CallResult(results_df)
Пример #2
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Produce image detection predictions.

        Parameters
        ----------
            inputs  : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, 
                      and bounding box for each image.

        Returns
        -------
            outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as 
                      a string (8 coordinate format), and confidence scores.
        """
        iou_threshold = 0.5  # Bounding box overlap threshold for false positive or true positive
        score_threshold = 0.05  # The score confidence threshold to use for detections
        max_detections = 100  # Maxmimum number of detections to use per image

        # Convert training model to inference model
        inference_model = models.convert_model(self.training_model)

        # Generate image paths
        image_cols = inputs.metadata.get_columns_with_semantic_type(
            'https://metadata.datadrivendiscovery.org/types/FileName')
        self.base_dir = [
            inputs.metadata.query(
                (metadata_base.ALL_ELEMENTS,
                 t))['location_base_uris'][0].replace('file:///', '/')
            for t in image_cols
        ]
        self.image_paths = np.array([[
            os.path.join(self.base_dir, filename)
            for filename in inputs.iloc[:, col]
        ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten()
        self.image_paths = pd.Series(self.image_paths)

        # Initialize output objects
        box_list = []
        score_list = []
        image_name_list = []

        # Predict bounding boxes and confidence scores for each image
        image_list = [
            x for i, x in enumerate(self.image_paths.tolist())
            if self.image_paths.tolist().index(x) == i
        ]

        start_time = time.time()
        print('Starting testing...', file=sys.__stdout__)

        for i in image_list:
            image = read_image_bgr(i)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            boxes, scores, labels = inference_model.predict_on_batch(
                tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32))

            # correct for image scale
            boxes /= scale

            for box, score in zip(boxes[0], scores[0]):
                if score < 0.5:
                    break

                b = box.astype(int)
                box_list.append(b)
                score_list.append(score)
                image_name_list.append(i * len(b))

        print(
            f'Testing complete. Testing took {time.time()-start_time} seconds.',
            file=sys.__stdout__)

        ## Convert predicted boxes from a list of arrays to a list of strings
        boxes = np.array(box_list).tolist()
        boxes = list(
            map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]],
                boxes))  # Convert to 8 coordinate format for D3M
        boxes = list(map(lambda x: ",".join(map(str, x)), boxes))

        # Create mapping between image names and D3M index
        input_df = pd.DataFrame({
            'd3mIndex':
            inputs.d3mIndex,
            'image': [os.path.basename(list) for list in self.image_paths]
        })

        d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list')

        # Extract values for image name keys and get missing image predictions (if they exist)
        image_name_list = [os.path.basename(list) for list in image_name_list]
        d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list]
        empty_predictions_image_names = [
            k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx
        ]
        d3mIdx = [item for sublist in d3mIdx
                  for item in sublist]  # Flatten list of lists

        ## Assemble in a Pandas DataFrame
        results = pd.DataFrame({
            'd3mIndex': d3mIdx,
            'bounding_box': boxes,
            'confidence': score_list
        })

        # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return
        # predictions for an image, create a dummy empty prediction row to add to results_df for that
        # missing image.
        if len(empty_predictions_image_names) != 0:
            # Create data frame of empty predictions for missing each image and concat with results.
            # Sort results_df.
            empty_predictions_df = self._fill_empty_predictions(
                empty_predictions_image_names, d3mIdx_image_mapping)
            results_df = pd.concat([results, empty_predictions_df
                                    ]).sort_values('d3mIndex')
        else:
            results_df = results

        # Convert to DataFrame container
        results_df = d3m_DataFrame(results_df)

        ## Assemble first output column ('d3mIndex)
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'd3mIndex'
        col_dict['semantic_types'] = (
            'http://schema.org/Integer',
            'https://metadata.datadrivendiscovery.org/types/PrimaryKey')
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 0), col_dict)

        ## Assemble second output column ('bounding_box')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'bounding_box'
        col_dict['semantic_types'] = (
            'http://schema.org/Text',
            'https://metadata.datadrivendiscovery.org/types/PredictedTarget',
            'https://metadata.datadrivendiscovery.org/types/BoundingPolygon')
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 1), col_dict)

        ## Assemble third output column ('confidence')
        col_dict = dict(
            results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'confidence'
        col_dict['semantic_types'] = (
            'http://schema.org/Integer',
            'https://metadata.datadrivendiscovery.org/types/Score')
        results_df.metadata = results_df.metadata.update(
            (metadata_base.ALL_ELEMENTS, 2), col_dict)

        return CallResult(results_df)
 def preprocess_image(self, inputs):
     """ Takes as input an image and prepares it for being passed through the network.
     """
     return preprocess_image(inputs, mode='caffe')