def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    
    # Run inference
    output_dict = model(input_tensor)
    
    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key:value[0, :num_detections].numpy() for key,value in output_dict.items()}
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                output_dict['detection_masks'], output_dict['detection_boxes'],
                image.shape[0], image.shape[1])      
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                       tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
    return output_dict
예제 #2
0
 def reframe_detection_mask(self, image):
     '''
     对图片的分割掩码进行转换
     :param image:
     :return:
     '''
     if 'detection_masks' in self.tensor_dict:
         # The following processing is only for single image
         detection_boxes = tf.squeeze(self.tensor_dict['detection_boxes'],
                                      [0])
         detection_masks = tf.squeeze(self.tensor_dict['detection_masks'],
                                      [0])
         # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
         real_num_detection = tf.cast(self.tensor_dict['num_detections'][0],
                                      tf.int32)
         detection_boxes = tf.slice(detection_boxes, [0, 0],
                                    [real_num_detection, -1])
         detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                    [real_num_detection, -1, -1])
         detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
             detection_masks, detection_boxes, image.shape[0],
             image.shape[1])
         detection_masks_reframed = tf.cast(
             tf.greater(detection_masks_reframed, 0.5), tf.uint8)
         # Follow the convention by adding back the batch dimension
         self.tensor_dict['detection_masks'] = tf.expand_dims(
             detection_masks_reframed, 0)
def run_inference_for_single_image(image, graph, currConfig):
    #  with tf.device('gpu'):
    with graph.as_default():
        with tf.Session(config=currConfig) as sess:
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                    image.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            # Run inference
            output_dict = sess.run(
                tensor_dict,
                feed_dict={image_tensor: np.expand_dims(image, 0)})

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(
                output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
                'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
                0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
                    'detection_masks'][0]
    return output_dict
예제 #4
0
def run_inference_for_single_image(image, graph):
    if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                     tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0],
                                   [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                   [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                           feed_dict={image_tensor: np.expand_dims(image, 0)})

    # all outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict['detection_classes'][
        0].astype(np.uint8)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    input_tensor = tf.convert_to_tensor(image)
    input_tensor = input_tensor[tf.newaxis, ...]

    output_dict = model(input_tensor)

    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    }
    output_dict['num_detections'] = num_detections

    output_dict['detection_classes'] = output_dict['detection_classes'].astype(
        np.int64)

    if 'detection_masks' in output_dict:
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                           tf.uint8)
        output_dict[
            'detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict
def run_inference_for_single_image(image, graph):
    with graph.as_default():
        with tf.Session() as sess:
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                    image.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            output_dict = sess.run(
                tensor_dict,
                feed_dict={image_tensor: np.expand_dims(image, 0)})

            output_dict['num_detections'] = int(
                output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
                'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
                0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
                    'detection_masks'][0]
    return output_dict
예제 #7
0
 def testZeroImageOnEmptyMask(self):
     box_masks = tf.constant([[[0, 0], [0, 0]]], dtype=tf.float32)
     boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
     image_masks = ops.reframe_box_masks_to_image_masks(box_masks,
                                                        boxes,
                                                        image_height=4,
                                                        image_width=4)
     np_expected_image_masks = np.array(
         [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
         dtype=np.float32)
     with self.test_session() as sess:
         np_image_masks = sess.run(image_masks)
         self.assertAllClose(np_image_masks, np_expected_image_masks)
예제 #8
0
 def testMaskIsCenteredInImageWhenBoxIsCentered(self):
     box_masks = tf.constant([[[1, 1], [1, 1]]], dtype=tf.float32)
     boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
     image_masks = ops.reframe_box_masks_to_image_masks(box_masks,
                                                        boxes,
                                                        image_height=4,
                                                        image_width=4)
     np_expected_image_masks = np.array(
         [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 0]]],
         dtype=np.float32)
     with self.test_session() as sess:
         np_image_masks = sess.run(image_masks)
         self.assertAllClose(np_image_masks, np_expected_image_masks)
예제 #9
0
 def testMaskOffCenterRemainsOffCenterInImage(self):
     box_masks = tf.constant([[[1, 0], [0, 1]]], dtype=tf.float32)
     boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
     image_masks = ops.reframe_box_masks_to_image_masks(box_masks,
                                                        boxes,
                                                        image_height=4,
                                                        image_width=4)
     np_expected_image_masks = np.array(
         [[[0, 0, 0, 0], [0, 0, 0.6111111, 0.16666669],
           [0, 0, 0.3888889, 0.83333337], [0, 0, 0, 0]]],
         dtype=np.float32)
     with self.test_session() as sess:
         np_image_masks = sess.run(image_masks)
         self.assertAllClose(np_image_masks, np_expected_image_masks)
예제 #10
0
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    input_tensor = tf.convert_to_tensor(image)
    input_tensor = input_tensor[tf.newaxis, ...]

    # output_dict is a dict  with keys detection_classes , num_detections , detection_boxes(4 coordinates of each box) , detection_scores for 100 boxes
    output_dict = model(input_tensor)
    # print(1,output_dict)

    # num_detections gives number of objects in current frame
    num_detections = int(output_dict.pop('num_detections'))
    # print(2,num_detections)

    # output_dict is a dict  with keys detection_classes , detection_boxes(4 coordinates of each box) , detection_scores for num_detections boxes
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    }

    # adding num_detections that was earlier popped out
    output_dict['num_detections'] = num_detections

    # converting all values in detection_classes as ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(
        np.int64)
    print(5, output_dict)

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                           tf.uint8)
        output_dict[
            'detection_masks_reframed'] = detection_masks_reframed.numpy()
        print(5, detection_masks_reframed)
    return output_dict
예제 #11
0
    def detect(self, image):
        """
        Detects objects in the image given

        Args:
        image: (numpy array) input image

        Returns:
        output_dict (dictionary) Contains boxes, scores, masks etc.
        """
        with self._detection_graph.as_default():
            # Get handles to input and output tensors
            ops = tf.compat.v1.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.compat.v1.get_default_graph(
                    ).get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                    image.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            start = time.time()

            # Run inference
            output_dict = self._sess.run(
                tensor_dict,
                feed_dict={image_tensor: np.expand_dims(image, 0)})

            end = time.time()

            #print end-start

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(
                output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
                'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
                0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
                    'detection_masks'][0]
        return (output_dict, self.category_index)
예제 #12
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False,
                                preprocess_input_options=None):
    """Restores the model in a tensorflow session.

    Args:
      model: model to perform predictions with.
      create_input_dict_fn: function to create input tensor dictionaries.
      ignore_groundtruth: whether groundtruth should be ignored.
      preprocess_input_options: a list of tuples, where each tuple contains a
        preprocess input function and a dictionary containing arguments and their
        values (see preprocessor_input.py).

    Returns:
      tensor_dict: A tensor dictionary with evaluations.
    """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    images = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
    float_images = tf.to_float(images)
    input_dict[fields.InputDataFields.image] = float_images

    if preprocess_input_options:
        input_dict = preprocessor_input.preprocess(input_dict, preprocess_input_options)

    original_image = input_dict[fields.InputDataFields.image]
    preprocessed_image = model.preprocess(original_image)

    prediction_dict = model.predict(preprocessed_image)

    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    if model.is_rbbox:
        absolute_detection_boxlist = rbox_list_ops.to_absolute_coordinates(
            rbox_list.RBoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
            original_image_shape[1], original_image_shape[2])
    else:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
            original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image': original_image,
        'image_id': input_dict[fields.InputDataFields.source_id],
        'filename': input_dict[fields.InputDataFields.filename],
        'sensor': input_dict[fields.InputDataFields.sensor],
        'detection_boxes': absolute_detection_boxlist.get(),
        'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes': (
            tf.squeeze(detections['detection_classes'], axis=0) +
            label_id_offset),
    }
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'],
                                     axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'],
                                     axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks,
            detection_boxes,
            original_image_shape[1], original_image_shape[2])
        detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
                                                          0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed
    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        if model.is_rbbox:
            normalized_gt_boxlist = rbox_list.RBoxList(input_dict[fields.InputDataFields.groundtruth_rboxes])
            gt_boxlist = rbox_list_ops.scale(normalized_gt_boxlist,
                                             tf.shape(original_image)[1],
                                             tf.shape(original_image)[2])
        else:
            normalized_gt_boxlist = box_list.BoxList(input_dict[fields.InputDataFields.groundtruth_boxes])
            gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                            tf.shape(original_image)[1],
                                            tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
        tensor_dict['is_crowd'] = input_dict[fields.InputDataFields.groundtruth_is_crowd]
        tensor_dict['difficult'] = input_dict[fields.InputDataFields.groundtruth_difficult]
        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]
    return tensor_dict
예제 #13
0
                        ).get_tensor_by_name(tensor_name)
                if 'detection_masks' in tensor_dict:
                    # The following processing is only for single image
                    detection_boxes = tf.squeeze(
                        tensor_dict['detection_boxes'], [0])
                    detection_masks = tf.squeeze(
                        tensor_dict['detection_masks'], [0])
                    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                    real_num_detection = tf.cast(
                        tensor_dict['num_detections'][0], tf.int32)
                    detection_boxes = tf.slice(detection_boxes, [0, 0],
                                               [real_num_detection, -1])
                    detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                               [real_num_detection, -1, -1])
                    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                        detection_masks, detection_boxes, image.shape[1],
                        image.shape[2])
                    detection_masks_reframed = tf.cast(
                        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                    # Follow the convention by adding back the batch dimension
                    tensor_dict['detection_masks'] = tf.expand_dims(
                        detection_masks_reframed, 0)
                image_tensor = tf.get_default_graph().get_tensor_by_name(
                    'image_tensor:0')

                for j in range(numImages):

                    camera.capture(image_np, format='bgr')
                    print('exposure_speed={}'.format(camera.exposure_speed))
                    # At this point the image is available as stream.array
예제 #14
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields()
    output_dict = {
        input_data_fields.original_image: image,
        input_data_fields.key: key,
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes][0]
    output_dict[detection_fields.detection_boxes] = detection_boxes
    image_shape = tf.shape(image)
    if scale_to_absolute:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
        output_dict[detection_fields.detection_boxes] = (
            absolute_detection_boxlist.get())
    detection_scores = detections[detection_fields.detection_scores][0]
    output_dict[detection_fields.detection_scores] = detection_scores

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes][0]) +
            label_id_offset)
    output_dict[detection_fields.detection_classes] = detection_classes

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks][0]
        # TODO: This should be done in model's postprocess
        # function ideally.
        num_detections = tf.to_int32(
            detections[detection_fields.num_detections][0])
        detection_boxes = tf.slice(detection_boxes,
                                   begin=[0, 0],
                                   size=[num_detections, -1])
        detection_masks = tf.slice(detection_masks,
                                   begin=[0, 0, 0],
                                   size=[num_detections, -1, -1])
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image_shape[1], image_shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        output_dict[
            detection_fields.detection_masks] = detection_masks_reframed
    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[
            detection_fields.detection_keypoints][0]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            absolute_detection_keypoints = keypoint_ops.scale(
                detection_keypoints, image_shape[1], image_shape[2])
            output_dict[detection_fields.detection_keypoints] = (
                absolute_detection_keypoints)

    if groundtruth:
        if input_data_fields.groundtruth_instance_masks in groundtruth:
            groundtruth[
                input_data_fields.groundtruth_instance_masks] = tf.cast(
                    groundtruth[input_data_fields.groundtruth_instance_masks],
                    tf.uint8)
        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(groundtruth_boxes), image_shape[1],
                image_shape[2])
            output_dict[input_data_fields.groundtruth_boxes] = (
                absolute_gt_boxlist.get())
        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

    return output_dict
예제 #15
0
def run_inference_for_single_video_ssd(graph, video):
    with graph.as_default():
        with tf.Session() as sess:
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in [
                'num_detections', 'detection_boxes', 'detection_scores',
                'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                        tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[1], image.shape[2])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)

            # INPUT TENSOR OF DETECTION GRAPH
            image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

            # Load Video with opencv
            INPUT_VIDEO = os.path.join(PATH_TO_VIDEOS_DIR, PATH_TO_TEST_VIDEOS_DIR,video)
            #####################################################################################
            # open video handle
            #####################################################################################
            cap = cv2.VideoCapture(INPUT_VIDEO)

            #####################################################################################
            # Prepare for saving the detected video
            #####################################################################################
            sz = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            fourcc = cv2.VideoWriter_fourcc(*'mpeg')
            vout = cv2.VideoWriter()
            vout.open(os.path.join(PATH_TO_VIDEOS_DIR, PATH_TO_RES_VIDEOS_DIR, video), fourcc, 20, sz, True)

            while cap.isOpened():
                ret, image = cap.read()

                if ret:
                    # shape = image.shape
                    # image_np = load_cv_image_into_numpy_array(image)
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    #pil_image = Image.fromarray(image.astype('uint8'), 'RGB')
                    #img_resized = letter_box_image(pil_image, int(416), int(416), 128)
                    #img_resized = img_resized.astype(np.float32)
                    image_np_expanded = np.expand_dims(image, axis=0)
                    # OBTAIN OUTPUT TENSOR
                    # Run inference
                    output_dict = sess.run(tensor_dict,
                                           feed_dict={image_tensor: image_np_expanded})

                    # all outputs are float32 numpy arrays, so convert types as appropriate
                    output_dict['num_detections'] = int(output_dict['num_detections'][0])
                    output_dict['detection_classes'] = output_dict[
                        'detection_classes'][0].astype(np.int64)
                    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
                    output_dict['detection_scores'] = output_dict['detection_scores'][0]
                    if 'detection_masks' in output_dict:
                        output_dict['detection_masks'] = output_dict['detection_masks'][0]


                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image,
                        output_dict['detection_boxes'],
                        output_dict['detection_classes'],
                        output_dict['detection_scores'],
                        category_index,
                        instance_masks=output_dict.get('detection_masks'),
                        use_normalized_coordinates=True,
                        line_thickness=3)

                    # Save the video frame by frame
                    vout.write(image)

                    # show_image = plt.figure()
                    cv2.imshow("detection", image)
                    # show_image.show()
                    # image_np.show()
                    if cv2.waitKey(110) & 0xff == 27:
                        break

                else:
                    break

            vout.release()
            cap.release()
예제 #16
0
def inference_for_single_image(image, graph):
    with graph.as_default():
        with tf.Session() as sess:
            #Handling the input and output tensors
            ops = tf.get_default_graph().get_operations()
            #giving names to tensor
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            #for a single image, detecting the boxes, scores, classes etc
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                #this is done for the single image.
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                #here reframing is done for to translate the mask from box coordinates to image coordinates
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])

                #reframing of detection mask

                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[1],
                    image.shape[2])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)

                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            output_dict = sess.run(tensor_dict,
                                   feed_dict={image_tensor: image})

            #the output generated is in the form of float32,datatype.Conversion is needed

            output_dict['num_detections'] = int(
                output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
                'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
                0]

            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
                    'detection_masks'][0]

    return output_dict
    def get(self):
        # print("------------->>>>", IMAGE_PATH)
        parser = reqparse.RequestParser()
        parser.add_argument('image_path', type=str)
        args = parser.parse_args()
        image_path = args['image_path']
        image = Image.open(image_path)
        # print("image :: ", image, type(image), dir(image))
        # the array based representation of the image will be used later in order to prepare the
        # result image with boxes and labels on it.
        image_np = load_image_into_numpy_array(image)
        # print("image_np >> ", image_np, type(image_np), image_np.shape)
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        # image_np_expanded = np.expand_dims(image_np, axis=0)
        image_np_expanded = image_np
        # print("image_np_expanded >> ", image_np_expanded,
        #       type(image_np_expanded), image_np_expanded.shape)

        st_time = datetime.now()
        with detection_graph.as_default():
            with tf.Session(graph=detection_graph) as sess:
                print("detection begins......")
                # Get handles to input and output tensors
                ops = tf.get_default_graph().get_operations()
                all_tensor_names = {
                    output.name
                    for op in ops for output in op.outputs
                }
                tensor_dict = {}
                for key in [
                        'num_detections', 'detection_boxes',
                        'detection_scores', 'detection_classes',
                        'detection_masks'
                ]:
                    tensor_name = key + ':0'
                    if tensor_name in all_tensor_names:
                        tensor_dict[key] = tf.get_default_graph(
                        ).get_tensor_by_name(tensor_name)
                if 'detection_masks' in tensor_dict:
                    # The following processing is only for single image
                    detection_boxes = tf.squeeze(
                        tensor_dict['detection_boxes'], [0])
                    detection_masks = tf.squeeze(
                        tensor_dict['detection_masks'], [0])
                    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                    real_num_detection = tf.cast(
                        tensor_dict['num_detections'][0], tf.int32)
                    detection_boxes = tf.slice(detection_boxes, [0, 0],
                                               [real_num_detection, -1])
                    detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                               [real_num_detection, -1, -1])
                    # print(">>>", image_np_expanded.shape, image_np_expanded.shape[0], image_np_expanded.shape[1])
                    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                        detection_masks, detection_boxes,
                        image_np_expanded.shape[0], image_np_expanded.shape[1])
                    detection_masks_reframed = tf.cast(
                        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                    # Follow the convention by adding back the batch dimension
                    tensor_dict['detection_masks'] = tf.expand_dims(
                        detection_masks_reframed, 0)
                image_tensor = tf.get_default_graph().get_tensor_by_name(
                    'image_tensor:0')

                et_time = datetime.now()
                # duration =
                print("session loaded in ... ", et_time - st_time)
                # Run inference
                output_dict = sess.run(tensor_dict,
                                       feed_dict={
                                           image_tensor:
                                           np.expand_dims(
                                               image_np_expanded, 0)
                                       })
                ed_time = datetime.now()
                duration = ed_time - st_time
                print("detection done in ... ", duration)
                # all outputs are float32 numpy arrays, so convert types as appropriate
                output_dict['num_detections'] = int(
                    output_dict['num_detections'][0])
                output_dict['detection_classes'] = output_dict[
                    'detection_classes'][0].astype(np.uint8)
                output_dict['detection_boxes'] = output_dict[
                    'detection_boxes'][0]
                output_dict['detection_scores'] = \
                output_dict['detection_scores'][0]
                if 'detection_masks' in output_dict:
                    output_dict['detection_masks'] = \
                    output_dict['detection_masks'][0]
                # print("output_dict :: ", output_dict)
                image_np_op = vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    output_dict['detection_boxes'],
                    output_dict['detection_classes'],
                    output_dict['detection_scores'],
                    category_index,
                    instance_masks=output_dict.get('detection_masks'),
                    use_normalized_coordinates=True,
                    line_thickness=8)

                # plt.figure(figsize=IMAGE_SIZE)
                # plt.imshow(image_np_op)

                cv2.imwrite('color_img.jpg', image_np_op)
                cv2.imshow('Color image', image_np_op)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

                return output_dict
예제 #18
0
    def frames():
        with picamera.PiCamera() as camera:
            #camera.vflip = True
            #camera.hflip = True
            resX = 768
            resY = 512
            image_np = np.empty((resY, resX, 3), dtype=np.uint8)
            camera.resolution = (resX, resY)

            with detection_graph.as_default():
                with tf.compat.v1.Session() as sess:
                    # Get handles to input and output tensors
                    ops = tf.compat.v1.get_default_graph().get_operations()
                    all_tensor_names = {
                        output.name
                        for op in ops for output in op.outputs
                    }
                    tensor_dict = {}
                    for key in [
                            'num_detections', 'detection_boxes',
                            'detection_scores', 'detection_classes',
                            'detection_masks'
                    ]:
                        tensor_name = key + ':0'
                        if tensor_name in all_tensor_names:
                            tensor_dict[key] = tf.compat.v1.get_default_graph(
                            ).get_tensor_by_name(tensor_name)
                    if 'detection_masks' in tensor_dict:
                        # The following processing is only for single image
                        detection_boxes = tf.squeeze(
                            tensor_dict['detection_boxes'], [0])
                        detection_masks = tf.squeeze(
                            tensor_dict['detection_masks'], [0])
                        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                        real_num_detection = tf.cast(
                            tensor_dict['num_detections'][0], tf.int32)
                        detection_boxes = tf.slice(detection_boxes, [0, 0],
                                                   [real_num_detection, -1])
                        detection_masks = tf.slice(
                            detection_masks, [0, 0, 0],
                            [real_num_detection, -1, -1])
                        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                            detection_masks, detection_boxes, image.shape[1],
                            image.shape[2])
                        detection_masks_reframed = tf.cast(
                            tf.greater(detection_masks_reframed, 0.5),
                            tf.uint8)
                        # Follow the convention by adding back the batch dimension
                        tensor_dict['detection_masks'] = tf.expand_dims(
                            detection_masks_reframed, 0)
                    image_tensor = tf.compat.v1.get_default_graph(
                    ).get_tensor_by_name('image_tensor:0')

                    # let camera warm up
                    # time.sleep(2) camera startup duirng Tensorflow startup
                    ''' Works, latency
                stream = io.BytesIO()
                for _ in camera.capture_continuous(stream, 'jpeg',use_video_port=True):
                    # return current frame
                    stream.seek(0)
                    yield stream.read()
                    # reset stream for next frame
                    stream.seek(0)
                    stream.truncate()
                '''
                    ''' works, latency
                while True:
                    camera.capture(image_np, format='bgr')
                    yield cv2.imencode('.jpg', image_np)[1].tobytes()
                '''

                    # 309 MS
                    stream = io.BytesIO()
                    for _ in camera.capture_continuous(stream,
                                                       'bgr',
                                                       use_video_port=True):
                        # return current frame
                        stream.seek(0)

                        image_np = np.frombuffer(stream.read(),
                                                 dtype=np.uint8,
                                                 count=resX * resY *
                                                 3).reshape((resY, resX, 3))

                        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                        image_np_expanded = np.expand_dims(image_np, axis=0)
                        # Actual detection.
                        #output_dict = run_inference_for_single_image(tensor_dict, image_np_expanded, detection_graph)
                        output_dict = sess.run(
                            tensor_dict,
                            feed_dict={image_tensor: image_np_expanded})

                        # all outputs are float32 numpy arrays, so convert types as appropriate
                        output_dict['num_detections'] = int(
                            output_dict['num_detections'][0])
                        output_dict['detection_classes'] = output_dict[
                            'detection_classes'][0].astype(np.int64)
                        output_dict['detection_boxes'] = output_dict[
                            'detection_boxes'][0]
                        output_dict['detection_scores'] = output_dict[
                            'detection_scores'][0]
                        if 'detection_masks' in output_dict:
                            output_dict['detection_masks'] = output_dict[
                                'detection_masks'][0]

                        # Visualization of the results of a detection.
                        for i in range(output_dict['num_detections']):
                            if output_dict['detection_scores'][i] > 0.5:
                                pt1 = (
                                    int(output_dict['detection_boxes'][i][1] *
                                        image_np.shape[1]),
                                    int(output_dict['detection_boxes'][i][0] *
                                        image_np.shape[0]))
                                pt2 = (
                                    int(output_dict['detection_boxes'][i][3] *
                                        image_np.shape[1]),
                                    int(output_dict['detection_boxes'][i][2] *
                                        image_np.shape[0]))
                                cv2.rectangle(image_np, pt1, pt2, (0, 255, 0),
                                              3)
                                classTxt = '{} ({:2})'.format(
                                    category_index[output_dict[
                                        'detection_classes'][i]]['name'],
                                    output_dict['detection_scores'][i])
                                cv2.putText(image_np, classTxt, pt1,
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                            (255, 0, 0))

                        yield cv2.imencode('.jpg', image_np)[1].tobytes()
                        # reset stream for next frame
                        stream.seek(0)
                        stream.truncate()