def main(unused_argv):
    del unused_argv
    # Load the label map.
    print(' - Loading the label map...')
    label_map_dict = {}
    if FLAGS.label_map_format == 'csv':
        with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=':')
            for row in reader:
                if len(row) != 2:
                    raise ValueError(
                        'Each row of the csv label map file must be in '
                        '`id:name` format.')
                id_index = int(row[0])
                name = row[1]
                label_map_dict[id_index] = {
                    'id': id_index,
                    'name': name,
                }
    else:
        raise ValueError('Unsupported label map format: {}.'.format(
            FLAGS.label_map_format))

    with tf.Session(graph=tf.Graph()) as sess:
        print(' - Loading saved model...')
        meta_graph_def = tf.saved_model.load(
            sess, [tf.saved_model.tag_constants.SERVING],
            FLAGS.saved_model_dir)
        inputs = dict(meta_graph_def.signature_def['serving_default'].inputs)
        outputs = dict(meta_graph_def.signature_def['serving_default'].outputs)

        image_node = inputs['input'].name

        output_nodes = {
            'num_detections': outputs['num_detections'].name,
            'detection_boxes': outputs['detection_boxes'].name,
            'detection_classes': outputs['detection_classes'].name,
            'detection_attributes': outputs['detection_attributes'].name,
            'detection_scores': outputs['detection_scores'].name,
        }
        if 'detection_masks' in outputs:
            output_nodes['detection_masks'] = outputs['detection_masks'].name
        if not FLAGS.use_normalized_coordinates:
            if 'image_info' not in outputs:
                raise ValueError(
                    'If `use_normalized_coordinates` = False, `image_info`'
                    ' node must be included in the SavedModel.')
            output_nodes['image_info'] = outputs['image_info'].name

        image_with_detections_list = []
        image_files = tf.gfile.Glob(FLAGS.image_file_pattern)
        for i, image_file in enumerate(image_files):
            print(' - processing image %d...' % i)

            image = Image.open(image_file)
            image = image.convert('RGB')  # needed for images with 4 channels.
            width, height = image.size

            np_image = (np.array(image.getdata()).reshape(height, width,
                                                          3).astype(np.uint8))
            np_image_input = input_utils.normalize_image_np(np_image)
            np_image_input = np.float32(
                np_image_input.reshape(1, height, width, 3))

            output_results = sess.run(output_nodes,
                                      feed_dict={image_node: np_image_input})

            num_detections = int(output_results['num_detections'][0])
            np_boxes = output_results['detection_boxes'][0, :num_detections]
            if not FLAGS.use_normalized_coordinates:
                np_image_info = output_results['image_info'][0]
                np_boxes = np_boxes / np.tile(np_image_info[1:2, :], (1, 2))
            ymin, xmin, ymax, xmax = np.split(np_boxes, 4, axis=-1)
            ymin = ymin * height
            ymax = ymax * height
            xmin = xmin * width
            xmax = xmax * width
            np_boxes = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)
            np_scores = output_results['detection_scores'][0, :num_detections]
            np_classes = output_results['detection_classes'][
                0, :num_detections]
            np_classes = np_classes.astype(np.int32)
            np_attributes = output_results['detection_attributes'][
                0, :num_detections, :]
            np_masks = None
            if 'detection_masks' in output_results:
                np_masks = output_results['detection_masks'][
                    0, :num_detections]
                np_masks = mask_utils.paste_instance_masks(
                    np_masks, box_utils.yxyx_to_xywh(np_boxes), height, width)

            image_with_detections = (
                visualization_utils.visualize_boxes_and_labels_on_image_array(
                    np_image,
                    np_boxes,
                    np_classes,
                    np_scores,
                    label_map_dict,
                    instance_masks=np_masks,
                    use_normalized_coordinates=False,
                    max_boxes_to_draw=FLAGS.max_boxes_to_draw,
                    min_score_thresh=FLAGS.min_score_threshold))
            image_with_detections_list.append(image_with_detections)

        print(' - Saving the outputs...')
        formatted_image_with_detections_list = [
            Image.fromarray(image.astype(np.uint8))
            for image in image_with_detections_list
        ]
        html_str = '<html>'
        image_strs = []
        for formatted_image in formatted_image_with_detections_list:
            with io.BytesIO() as stream:
                formatted_image.save(stream, format='JPEG')
                data_uri = base64.b64encode(stream.getvalue()).decode('utf-8')
            image_strs.append(
                '<img src="data:image/jpeg;base64,{}", height=800>'.format(
                    data_uri))
        images_str = ' '.join(image_strs)
        html_str += images_str
        html_str += '</html>'
        with tf.gfile.GFile(FLAGS.output_html, 'w') as f:
            f.write(html_str)
예제 #2
0
def main(unused_argv):
    del unused_argv
    # Load the label map.
    print(' - Loading the label map...')
    label_map_dict = {}
    if FLAGS.label_map_format == 'csv':
        with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=':')
            for row in reader:
                if len(row) != 2:
                    raise ValueError(
                        'Each row of the csv label map file must be in '
                        '`id:name` format.')
                id_index = int(row[0])
                name = row[1]
                label_map_dict[id_index] = {
                    'id': id_index,
                    'name': name,
                }
    else:
        raise ValueError('Unsupported label map format: {}.'.format(
            FLAGS.label_mape_format))

    params = config_factory.config_generator(FLAGS.model)
    if FLAGS.config_file:
        params = params_dict.override_params_dict(params,
                                                  FLAGS.config_file,
                                                  is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)
    params.override(
        {
            'architecture': {
                'use_bfloat16': False,  # The inference runs on CPU/GPU.
            },
        },
        is_strict=True)
    params.validate()
    params.lock()

    model = model_factory.model_generator(params)

    with tf.Graph().as_default():
        image_input = tf.placeholder(shape=(), dtype=tf.string)
        image = tf.io.decode_image(image_input, channels=3)
        image.set_shape([None, None, 3])

        image = input_utils.normalize_image(image)
        image_size = [FLAGS.image_size, FLAGS.image_size]
        image, image_info = input_utils.resize_and_crop_image(
            image,
            image_size,
            image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image.set_shape([image_size[0], image_size[1], 3])

        # batching.
        images = tf.reshape(image, [1, image_size[0], image_size[1], 3])
        images_info = tf.expand_dims(image_info, axis=0)

        # model inference
        outputs = model.build_outputs(images, {'image_info': images_info},
                                      mode=mode_keys.PREDICT)

        outputs['detection_boxes'] = (
            outputs['detection_boxes'] /
            tf.tile(images_info[:, 2:3, :], [1, 1, 2]))

        predictions = outputs

        # Create a saver in order to load the pre-trained checkpoint.
        saver = tf.train.Saver()

        image_with_detections_list = []
        with tf.Session() as sess:
            print(' - Loading the checkpoint...')
            saver.restore(sess, FLAGS.checkpoint_path)

            res = []
            image_files = tf.gfile.Glob(FLAGS.image_file_pattern)
            for i, image_file in enumerate(image_files):
                print(' - Processing image %d...' % i)

                with tf.gfile.GFile(image_file, 'rb') as f:
                    image_bytes = f.read()

                image = Image.open(image_file)
                image = image.convert(
                    'RGB')  # needed for images with 4 channels.
                width, height = image.size
                np_image = (np.array(image.getdata()).reshape(
                    height, width, 3).astype(np.uint8))

                predictions_np = sess.run(predictions,
                                          feed_dict={image_input: image_bytes})

                num_detections = int(predictions_np['num_detections'][0])
                np_boxes = predictions_np['detection_boxes'][
                    0, :num_detections]
                np_scores = predictions_np['detection_scores'][
                    0, :num_detections]
                np_classes = predictions_np['detection_classes'][
                    0, :num_detections]
                np_classes = np_classes.astype(np.int32)
                np_attributes = predictions_np['detection_attributes'][
                    0, :num_detections, :]
                np_masks = None
                if 'detection_masks' in predictions_np:
                    instance_masks = predictions_np['detection_masks'][
                        0, :num_detections]
                    np_masks = mask_utils.paste_instance_masks(
                        instance_masks, box_utils.yxyx_to_xywh(np_boxes),
                        height, width)
                    encoded_masks = [
                        mask_api.encode(np.asfortranarray(np_mask))
                        for np_mask in list(np_masks)
                    ]

                res.append({
                    'image_file': image_file,
                    'boxes': np_boxes,
                    'classes': np_classes,
                    'scores': np_scores,
                    'attributes': np_attributes,
                    'masks': encoded_masks,
                })

                image_with_detections = (
                    visualization_utils.
                    visualize_boxes_and_labels_on_image_array(
                        np_image,
                        np_boxes,
                        np_classes,
                        np_scores,
                        label_map_dict,
                        instance_masks=np_masks,
                        use_normalized_coordinates=False,
                        max_boxes_to_draw=FLAGS.max_boxes_to_draw,
                        min_score_thresh=FLAGS.min_score_threshold))
                image_with_detections_list.append(image_with_detections)

    print(' - Saving the outputs...')
    formatted_image_with_detections_list = [
        Image.fromarray(image.astype(np.uint8))
        for image in image_with_detections_list
    ]
    html_str = '<html>'
    image_strs = []
    for formatted_image in formatted_image_with_detections_list:
        with io.BytesIO() as stream:
            formatted_image.save(stream, format='JPEG')
            data_uri = base64.b64encode(stream.getvalue()).decode('utf-8')
        image_strs.append(
            '<img src="data:image/jpeg;base64,{}", height=800>'.format(
                data_uri))
    images_str = ' '.join(image_strs)
    html_str += images_str
    html_str += '</html>'
    with tf.gfile.GFile(FLAGS.output_html, 'w') as f:
        f.write(html_str)
    np.save(FLAGS.output_file, res)
예제 #3
0
def saveOutputs(sess, predictions, image_input, image_file):
    print(' - Loading the label map...')
    label_map_dict = {}
    if 'csv' == 'csv':
        with tf.gfile.Open('dataset/fashionpedia_label_map.csv',
                           'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=':')
            for row in reader:
                if len(row) != 2:
                    raise ValueError(
                        'Each row of the csv label map file must be in '
                        '`id:name` format.')
                id_index = int(row[0])
                name = row[1]
                label_map_dict[id_index] = {
                    'id': id_index,
                    'name': name,
                }

    print(' - Processing image ...')
    with tf.gfile.GFile(image_file, 'rb') as f:
        image_bytes = f.read()

    image = Image.open(image_file)
    image = image.convert('RGB')  # needed for images with 4 channels.
    width, height = image.size
    np_image = (np.array(image.getdata()).reshape(height, width,
                                                  3).astype(np.uint8))

    predictions_np = sess.run(predictions,
                              feed_dict={image_input: image_bytes})

    num_detections = int(predictions_np['num_detections'][0])
    np_boxes = predictions_np['detection_boxes'][0, :num_detections]
    np_scores = predictions_np['detection_scores'][0, :num_detections]
    np_classes = predictions_np['detection_classes'][0, :num_detections]
    np_classes = np_classes.astype(np.int32)
    np_attributes = predictions_np['detection_attributes'][
        0, :num_detections, :]

    np_masks = None
    if 'detection_masks' in predictions_np:
        instance_masks = predictions_np['detection_masks'][0, :num_detections]
        np_masks = mask_utils.paste_instance_masks(
            instance_masks, box_utils.yxyx_to_xywh(np_boxes), height, width)
        encoded_masks = [
            mask_api.encode(np.asfortranarray(np_mask))
            for np_mask in list(np_masks)
        ]

    res = []
    res.append({
        'image_file': image_file,
        'boxes': np_boxes,
        'classes': np_classes,
        'scores': np_scores,
        'attributes': np_attributes
    })
    #'masks': encoded_masks,
    print("Output generated")

    image_with_detections_list = []
    image_with_detections = (
        visualization_utils.visualize_boxes_and_labels_on_image_array(
            np_image,
            np_boxes,
            np_classes,
            np_scores,
            label_map_dict,
            instance_masks=np_masks,
            use_normalized_coordinates=False,
            max_boxes_to_draw=20,
            min_score_thresh=0.05))
    image_with_detections_list.append(image_with_detections)

    print(' - Saving the outputs...')
    formatted_image_with_detections_list = [
        Image.fromarray(image.astype(np.uint8))
        for image in image_with_detections_list
    ]
    html_str = '<html>'
    image_strs = []
    for formatted_image in formatted_image_with_detections_list:
        with io.BytesIO() as stream:
            formatted_image.save(stream, format='JPEG')
            data_uri = base64.b64encode(stream.getvalue()).decode('utf-8')
        image_strs.append(
            '<img src="data:image/jpeg;base64,{}", height=800>'.format(
                data_uri))
    images_str = ' '.join(image_strs)
    html_str += images_str
    html_str += '</html>'
    with tf.gfile.GFile("output.html", 'w') as f:
        f.write(html_str)
    np.save("output.npy", res)
    return {"v": "output.html", "d": "output.npy"}
예제 #4
0
def convert_predictions_to_coco_annotations(predictions):
    """Converts a batch of predictions to annotations in COCO format.

  Args:
    predictions: a dictionary of lists of numpy arrays including the following
      fields. K below denotes the maximum number of instances per image.
      Required fields:
        - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
        - num_detections: a list of numpy arrays of int of shape [batch_size].
        - detection_boxes: a list of numpy arrays of float of shape
            [batch_size, K, 4], where coordinates are in the original image
            space (not the scaled image space).
        - detection_classes: a list of numpy arrays of int of shape
            [batch_size, K].
        - detection_scores: a list of numpy arrays of float of shape
            [batch_size, K].
      Optional fields:
        - detection_masks: a list of numpy arrays of float of shape
            [batch_size, K, mask_height, mask_width].

  Returns:
    coco_predictions: prediction in COCO annotation format.
  """
    coco_predictions = []
    num_batches = len(predictions['source_id'])
    batch_size = predictions['source_id'][0].shape[0]
    max_num_detections = predictions['detection_classes'][0].shape[1]
    use_outer_box = 'detection_outer_boxes' in predictions
    for i in range(num_batches):
        predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh(
            predictions['detection_boxes'][i])
        if use_outer_box:
            predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh(
                predictions['detection_outer_boxes'][i])
            mask_boxes = predictions['detection_outer_boxes']
        else:
            mask_boxes = predictions['detection_boxes']

        for j in range(batch_size):
            if 'detection_masks' in predictions:
                image_masks = generate_segmentation_from_masks(
                    predictions['detection_masks'][i][j],
                    mask_boxes[i][j],
                    int(predictions['image_info'][i][j, 0, 0]),
                    int(predictions['image_info'][i][j, 0, 1]),
                    is_image_mask=False)
                binary_masks = (image_masks > 0.0).astype(np.uint8)
                encoded_masks = [
                    mask_utils.encode(np.asfortranarray(binary_mask))
                    for binary_mask in list(binary_masks)
                ]
            for k in range(max_num_detections):
                ann = {}
                ann['image_id'] = predictions['source_id'][i][j]
                ann['category_id'] = predictions['detection_classes'][i][j, k]
                ann['bbox'] = predictions['detection_boxes'][i][j, k]
                ann['score'] = predictions['detection_scores'][i][j, k]
                if 'detection_masks' in predictions:
                    ann['segmentation'] = encoded_masks[k]
                coco_predictions.append(ann)

    for i, ann in enumerate(coco_predictions):
        ann['id'] = i + 1

    return coco_predictions
def convert_predictions_to_coco_annotations(predictions, eval_image_sizes: dict = None, output_image_size: int = None,
                                            encode_mask_fn=None, score_threshold=0.05):
  """Converts a batch of predictions to annotations in COCO format.

  Args:
    predictions: a dictionary of lists of numpy arrays including the following
      fields. K below denotes the maximum number of instances per image.
      Required fields:
        - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
        - num_detections: a list of numpy arrays of int of shape [batch_size].
        - detection_boxes: a list of numpy arrays of float of shape
            [batch_size, K, 4], where coordinates are in the original image
            space (not the scaled image space).
        - detection_classes: a list of numpy arrays of int of shape
            [batch_size, K].
        - detection_scores: a list of numpy arrays of float of shape
            [batch_size, K].
      Optional fields:
        - detection_masks: a list of numpy arrays of float of shape
            [batch_size, K, mask_height, mask_width].

  Returns:
    coco_predictions: prediction in COCO annotation format.
  """
  coco_predictions = []
  num_batches = len(predictions['source_id'])
  use_outer_box = 'detection_outer_boxes' in predictions
  encode_mask_fn = (lambda x: mask_api.encode(np.asfortranarray(x))) if encode_mask_fn is None else encode_mask_fn

  for i in tqdm(range(num_batches), total=num_batches):
    predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh(
        predictions['detection_boxes'][i])

    if use_outer_box:
      predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh(
          predictions['detection_outer_boxes'][i])
      mask_boxes = predictions['detection_outer_boxes']
    else:
      mask_boxes = predictions['detection_boxes']

    batch_size = predictions['source_id'][i].shape[0]
    for j in range(batch_size):
      image_id = predictions['source_id'][i][j]
      orig_image_size = predictions['image_info'][i][j, 0]

      if eval_image_sizes:
        eval_image_size = eval_image_sizes[image_id] if eval_image_sizes else orig_image_size
      elif output_image_size:
        eval_image_size = get_new_image_size(orig_image_size, output_image_size)
      else:
        eval_image_size = orig_image_size

      eval_scale = orig_image_size[0] / eval_image_size[0]

      bbox_indices = np.argwhere(predictions['detection_scores'][i][j] >= score_threshold).flatten()

      if 'detection_masks' in predictions:
        predicted_masks = predictions['detection_masks'][i][j, bbox_indices]
        image_masks = mask_utils.paste_instance_masks(
            predicted_masks,
            mask_boxes[i][j, bbox_indices].astype(np.float32) / eval_scale,
            int(eval_image_size[0]),
            int(eval_image_size[1]))
        binary_masks = (image_masks > 0.0).astype(np.uint8)
        encoded_masks = [encode_mask_fn(binary_mask) for binary_mask in list(binary_masks)]

        mask_masks = (predicted_masks > 0.5).astype(np.float32)
        mask_areas = mask_masks.sum(axis=-1).sum(axis=-1)
        mask_area_fractions = (mask_areas / np.prod(predicted_masks.shape[1:])).tolist()
        mask_mean_scores = ((predicted_masks * mask_masks).sum(axis=-1).sum(axis=-1) / mask_areas).tolist()

      for m, k in enumerate(bbox_indices):
        ann = {
          'image_id': int(image_id),
          'category_id': int(predictions['detection_classes'][i][j, k]),
          'bbox': (predictions['detection_boxes'][i][j, k].astype(np.float32) / eval_scale).tolist(),
          'score': float(predictions['detection_scores'][i][j, k]),
        }

        if 'detection_masks' in predictions:
          ann['segmentation'] = encoded_masks[m]
          ann['mask_mean_score'] = mask_mean_scores[m]
          ann['mask_area_fraction'] = mask_area_fractions[m]

        if 'detection_attributes' in predictions:
          ann['attribute_probabilities'] = predictions['detection_attributes'][i][j, k].tolist()

        coco_predictions.append(ann)

  for i, ann in enumerate(coco_predictions):
    ann['id'] = i + 1

  return coco_predictions
def convert_predictions_to_coco_annotations(
    prediction: Prediction,
    image_id: int,
    filename: str,
    score_threshold=0.05,
) -> list[COCOAnnotation]:
    """This is made, modifying a function of the same name in
    /tf_tpu_models/official/detection/evaluation/coco_utils.py

    Parameters
    ----------
    prediction : Prediction
        [description]
    image_id: int
    filename: str
    score_threshold : float, optional
        [description], by default 0.05

    Returns
    -------
    list[COCOAnnotation]
        [description]
    """
    prediction["pred_detection_boxes"] = box_utils.yxyx_to_xywh(
        prediction["pred_detection_boxes"]
    )

    mask_boxes = prediction["pred_detection_boxes"]

    orig_shape = prediction["pred_image_info"][0]
    resize_shape = prediction["pred_image_info"][1]

    if orig_shape[0] > orig_shape[1]:
        o2r = orig_shape[0] / resize_shape[0]
    else:
        o2r = orig_shape[1] / resize_shape[1]

    bbox_indices = np.argwhere(
        prediction["pred_detection_scores"] >= score_threshold
    ).flatten()

    predicted_masks = prediction["pred_detection_masks"][bbox_indices]
    image_masks = mask_utils.paste_instance_masks(
        predicted_masks,
        mask_boxes[bbox_indices].astype(np.float32) * o2r,
        int(orig_shape[0]),
        int(orig_shape[1]),
    )
    binary_masks = (image_masks > 0.0).astype(np.uint8)
    encoded_masks = [_encode_mask_fn(binary_mask) for binary_mask in list(binary_masks)]

    mask_masks = (predicted_masks > 0.5).astype(np.float32)
    mask_areas = mask_masks.sum(axis=-1).sum(axis=-1)
    mask_area_fractions = (mask_areas / np.prod(predicted_masks.shape[1:])).tolist()
    mask_mean_scores = (
        (predicted_masks * mask_masks).sum(axis=-1).sum(axis=-1) / mask_areas
    ).tolist()

    anns: list[COCOAnnotation] = []
    for m, k in enumerate(bbox_indices):
        mask_mean_score = mask_mean_scores[m]
        # mask_mean_score is float("nan") when mask_area is 0.
        if not math.isnan(mask_mean_score):
            ann = COCOAnnotation(
                image_id=image_id,
                filename=filename,
                category_id=int(prediction["pred_detection_classes"][k]),
                # Avoid `astype(np.float32)` because
                # it can't be serialized as JSON.
                bbox=tuple(
                    float(x) for x in prediction["pred_detection_boxes"][k] * o2r
                ),
                mask_area_fraction=float(mask_area_fractions[m]),
                score=float(prediction["pred_detection_scores"][k]),
                segmentation=encoded_masks[m],
                mask_mean_score=mask_mean_score,
            )
            anns.append(ann)

    return anns