예제 #1
0
    def _load_kitti_annotation(self):
        def _get_obj_level(obj):
            height = float(obj[7]) - float(obj[5]) + 1
            truncation = float(obj[1])
            occlusion = float(obj[2])
            if height >= 40 and truncation <= 0.15 and occlusion <= 0:
                return 1
            elif height >= 25 and truncation <= 0.3 and occlusion <= 1:
                return 2
            elif height >= 25 and truncation <= 0.5 and occlusion <= 2:
                return 3
            else:
                return 4

        idx2annotation = {}

        for index in self._image_idx:

            filename = os.path.join(self._label_path, index + '.txt')

            with open(filename, 'r') as f:
                lines = f.readlines()
            f.close()
            bboxes = []

            for line in lines:
                obj = line.strip().split(' ')

                try:

                    cls = self._class_to_idx[obj[0].lower().strip()]

                except:
                    continue
                # print(_get_obj_level(obj), self.mc.EXCLUDE_HARD_EXAMPLES)
                if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3:
                    continue
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])
                # print(xmin, ymin, xmax, ymax)
                # if not xmin >= 0.0 and xmin <= xmax:
                #   continue
                # if not ymin >= 0.0 and ymin <= ymax:
                #   continue
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
                        .format(xmin, xmax, index)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
                        .format(ymin, ymax, index)
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                # print([x, y, w, h, cls])
                bboxes.append([x, y, w, h, cls])

            idx2annotation[index] = bboxes
            # print(idx2annotation)
            # sdfsd
        return idx2annotation
예제 #2
0
    def _load_pascal_annotation(self):
        idx2annotation = {}
        for index in self._image_idx:
            filename = os.path.join(self._data_path, 'Annotations',
                                    index + '.xml')
            tree = ET.parse(filename)
            objs = tree.findall('object')
            objs = [
                obj for obj in objs if int(obj.find('difficult').text) == 0
            ]
            bboxes = []
            for obj in objs:
                bbox = obj.find('bndbox')
                # Make pixel indexes 0-based
                xmin = float(bbox.find('xmin').text) - 1
                xmax = float(bbox.find('xmax').text) - 1
                ymin = float(bbox.find('ymin').text) - 1
                ymax = float(bbox.find('ymax').text) - 1
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}.xml' \
                        .format(xmin, xmax, index)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}.xml' \
                        .format(ymin, ymax, index)
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                cls = self._class_to_idx[obj.find('name').text.lower().strip()]
                bboxes.append([x, y, w, h, cls])

            idx2annotation[index] = bboxes

        return idx2annotation
예제 #3
0
	def _load_coco_annotation(self):
		
		idx2annotation = {}
		for index in self._image_idx:
			filename = os.path.join(self._label_path, index+'.txt')
			with open(filename, 'r') as f:
				lines = f.readlines()
			f.close()
			bboxes = []
			for line in lines:
				obj = line.strip().split(' ')
				try:
					cls = self._class_to_idx[obj[0].lower().strip()]
				except:
					continue				
				xmin = float(obj[1])
				ymin = float(obj[2])
				xmax = float(obj[3])
				ymax = float(obj[4])
				assert xmin >= 0.0 and xmin <= xmax, \
					'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
						.format(xmin, xmax, index)
				assert ymin >= 0.0 and ymin <= ymax, \
					'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
						.format(ymin, ymax, index)
				x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
				bboxes.append([x, y, w, h, cls])
			idx2annotation[index] = bboxes

		return idx2annotation
예제 #4
0
파일: vid.py 프로젝트: goan15910/ConvDet
    def _load_vid_annotation(self):
        idx2annotation = {}
        for index in self._image_idx:
            filename = os.path.join(self._anno_path, index + '.xml')
            tree = ET.parse(filename)
            objs = tree.findall('object')
            bboxes = []
            for obj in objs:
                bbox = obj.find('bndbox')
                xmin = float(bbox.find('xmin').text)
                xmax = float(bbox.find('xmax').text)
                ymin = float(bbox.find('ymin').text)
                ymax = float(bbox.find('ymax').text)
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}.xml' \
                        .format(xmin, xmax, index)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}.xml' \
                        .format(ymin, ymax, index)
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                cls = self._raw_cname_to_idx[obj.find(
                    'name').text.lower().strip()]
                bboxes.append([x, y, w, h, cls])

            idx2annotation[index] = bboxes

        return idx2annotation
예제 #5
0
    def _filter_ann(self, line_str, filter_cond=None):
        '''
        input:
             line_str: str_line info
             filter_cond: see @ _load_caltech_annotation
        return:
             bboxes: [cent_x,cent_y,w,h]
             gt_class: 0/-1,true/ignore
        '''
        strs = line_str.split(' ')
        str_len = len(strs)
        label = strs[0]
        x = float(strs[1])
        y = float(strs[2])
        w = float(strs[3])
        h = float(strs[4])
        occ_flag = float(strs[5])
        vis_x = float(strs[6])
        vis_y = float(strs[7])
        vis_w = float(strs[8])
        vis_h = float(strs[9])

        ignore_flag = 0
        if str_len > 9:
            ignore_flag = float(strs[10])
        if str_len > 10:  #not used now
            orientation_angle = float(strs[11])

        # priority: error_anno>ignore>true
        err_flag = x < 0 or y < 0 or w <= 0 or h <= 0
        err_flag = err_flag or (label != filter_cond['lbls']
                                and label != filter_cond['ilbls'])

        if err_flag:
            return [0, 0, 0, 0, -2, 0]

        ignore_flag = ignore_flag or (x + w) > 640 or (
            y + h) > 480 or label == filter_cond['ilbls']
        ignore_flag = ignore_flag or (
            not filter_cond['hRng'][1] > w > filter_cond['hRng'][0])
        if occ_flag:
            vis_ratio = 1.0 * vis_w * vis_h / w / h
            ignore_flag = ignore_flag or (not filter_cond['vRng'][1] >
                                          vis_ratio > filter_cond['vRng'][0])

        x2 = x + w
        y2 = y + h
        if x2 > 640:
            x2 = 640
        if y2 > 480:
            y2 = 480

        x, y, w, h = bbox_transform_inv([x, y, x2, y2])

        if ignore_flag:
            return [x, y, w, h, -1, 0]

        return [x, y, w, h, 0, 0]
예제 #6
0
  def _load_kitti_annotation(self):
    def _get_obj_level(obj):
      height = float(obj[7]) - float(obj[5]) + 1
      truncation = float(obj[1])
      occlusion = float(obj[2])
      if height >= 40 and truncation <= 0.15 and occlusion <= 0:
          return 1
      elif height >= 25 and truncation <= 0.3 and occlusion <= 1:
          return 2
      elif height >= 25 and truncation <= 0.5 and occlusion <= 2:
          return 3
      else:
          return 4

    idx2annotation = {}
    remove_list = []
    for index in self._image_idx:
      filename = os.path.join(self._label_path, index+'.txt')
      if index.endswith('.png') or not os.path.exists(filename):
	remove_list.append(index)
	continue
      # imagepath = os.path.join(self._image_path, index+'.png')
      # im = cv2.imread(imagepath)
      # if im is None:
      #   print("Corrupted Image: ", imagepath)
      #   remove_list.append(index)
      #   continue
      with open(filename, 'r') as f:
        lines = f.readlines()
      f.close()
      bboxes = []
      for line in lines:
        obj = line.strip().split(' ')
        try:
          cls = self._class_to_idx[obj[0].lower().strip()]
        except:
          continue

        if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3:
          continue
        xmin = float(obj[4])
        ymin = float(obj[5])
        xmax = float(obj[6])
        ymax = float(obj[7])
        if xmin < 0.0 or xmin > xmax or ymin < 0.0 or ymin > ymax:
          continue
        x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
        bboxes.append([x, y, w, h, cls])

      if len(bboxes) == 0:
          remove_list.append(index)
          continue
      idx2annotation[index] = bboxes
    
    self._image_idx = [index for index in self._image_idx if index not in remove_list]
    return idx2annotation
예제 #7
0
    def _load_kitti_annotation(self):
        def _get_obj_level(obj):
            height = float(obj[7]) - float(obj[5]) + 1
            truncation = float(obj[1])
            occlusion = float(obj[2])
            if height >= 40 and truncation <= 0.15 and occlusion <= 0:
                return 1
            elif height >= 25 and truncation <= 0.3 and occlusion <= 1:
                return 2
            elif height >= 25 and truncation <= 0.5 and occlusion <= 2:
                return 3
            else:
                return 4

        idx2annotation = {}
        for index in self._image_idx:
            filename = os.path.join(self._label_path, index + '.txt')
            with open(filename, 'r') as f:
                lines = f.readlines()
            f.close()
            bboxes = []
            for line in lines:
                obj = line.strip().split(' ')
                try:
                    cls = self._class_to_idx[obj[0].lower().strip()]
                except:
                    continue

                if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3:
                    continue
                im = np.expand_dims(
                    cv2.imread(self._image_path_at(index),
                               cv2.IMREAD_GRAYSCALE), -1)
                orig_h, orig_w = [float(v) for v in im.shape[:2]]
                self.x_scale = self.mc.IMAGE_WIDTH / orig_w
                self.y_scale = self.mc.IMAGE_HEIGHT / orig_h
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
                        .format(xmin, xmax, index)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
                        .format(ymin, ymax, index)
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                bboxes.append([x, y, w, h, cls])

            idx2annotation[index] = bboxes

        return idx2annotation
예제 #8
0
    def geteval_op_list(self):
        """get all tensorflow operations regarding this 
       model evaluation.
    """
        detection_boxes = tf.transpose(tf.stack(
            util.bbox_transform_inv([
                self.det_boxes["xmins"], self.det_boxes["ymins"],
                self.det_boxes["xmaxs"], self.det_boxes["ymaxs"]
            ])), (1, 2, 0),
                                       name='bbox')

        return filter(lambda x: x != None, [
            self.prediction_boxes, self.score, self.cls_idx_per_img,
            self.filenames, self.widths, self.heights, self.viz_op,
            self.det_boxes, self.det_probs, self.det_class
        ])
예제 #9
0
def _load_kitti_annotation(mc, filenames, class_to_idx):
  def _get_obj_level(obj):
    height = float(obj[7]) - float(obj[5]) + 1
    truncation = float(obj[1])
    occlusion = float(obj[2])
    if height >= 40 and truncation <= 0.15 and occlusion <= 0:
        return 1
    elif height >= 25 and truncation <= 0.3 and occlusion <= 1:
        return 2
    elif height >= 25 and truncation <= 0.5 and occlusion <= 2:
        return 3
    else:
        return 4
  label_path = os.path.join(mc.DATA_PATH, 'training', 'label_2')
  idx2annotation = {}
  for index in filenames:
    filename = os.path.join(label_path, index+'.txt')
    with open(filename, 'r') as f:
      lines = f.readlines()
    f.close()
    bboxes = []
    for line in lines:
      obj = line.strip().split(' ')
      try:
        cls = class_to_idx[obj[0].lower().strip()]
      except:
        continue

      if mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3:
        continue
      xmin = float(obj[4])
      ymin = float(obj[5])
      xmax = float(obj[6])
      ymax = float(obj[7])
      assert xmin >= 0.0 and xmin <= xmax, \
          'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
              .format(xmin, xmax, index)
      assert ymin >= 0.0 and ymin <= ymax, \
          'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
              .format(ymin, ymax, index)
      x, y, w, h = util.bbox_transform_inv([xmin, ymin, xmax, ymax])
      bboxes.append([x, y, w, h, cls])

    idx2annotation[index] = bboxes

  return idx2annotation
예제 #10
0
    def drift(self, image, gt_boxes):
        mc = self.mc
        drift_prob = np.random.rand()
        if drift_prob > mc.DRIFT_PROB:
            return image, gt_boxes

        ori_height, ori_width, ori_channel = [int(v) for v in image.shape]

        gt_boxes[:, 0::2] *= ori_width
        gt_boxes[:, 1::2] *= ori_height

        gt_boxes = np.array([bbox_transform_inv(box) for box in gt_boxes])

        # Ensures that gt boundibg box is not cutted out of the image
        max_drift_x = min(gt_boxes[:, 0] - gt_boxes[:, 2] / 2.0 + 1)
        max_drift_y = min(gt_boxes[:, 1] - gt_boxes[:, 3] / 2.0 + 1)
        assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image'
        dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y))
        dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x))

        # shift bbox
        gt_boxes[:, 0] = gt_boxes[:, 0] - dx
        gt_boxes[:, 1] = gt_boxes[:, 1] - dy
        #print ('[drift] -----------4')
        orig_h, orig_w, _ = [int(v) for v in image.shape]
        # distort image
        orig_h -= dy
        orig_w -= dx
        orig_x, dist_x = max(dx, 0), max(-dx, 0)
        orig_y, dist_y = max(dy, 0), max(-dy, 0)

        distorted_im = np.zeros(
            (int(orig_h), int(orig_w), 3)).astype(np.float32)
        distorted_im[dist_y:, dist_x:, :] = image[orig_y:, orig_x:, :]
        im = distorted_im

        gt_boxes = np.array([bbox_transform(box) for box in gt_boxes])
        #print ('[drift] -----------finish')

        height, width, channel = [int(v) for v in im.shape]
        gt_boxes[:, 0::2] /= width
        gt_boxes[:, 1::2] /= height

        return im, gt_boxes
예제 #11
0
                def _define_bbox(pred_bbox_delta, ANCHOR_BOX):
                    delta_x, delta_y, delta_w, delta_h = tf.unstack(
                        pred_bbox_delta, axis=2)
                    # set_anchors(mc, scale)
                    anchor_x = ANCHOR_BOX[:, 0]
                    anchor_y = ANCHOR_BOX[:, 1]
                    anchor_w = ANCHOR_BOX[:, 2]
                    anchor_h = ANCHOR_BOX[:, 3]

                    box_center_x = tf.identity(anchor_x + delta_x * anchor_w,
                                               name='bbox_cx')
                    box_center_y = tf.identity(anchor_y + delta_y * anchor_h,
                                               name='bbox_cy')
                    box_width = tf.identity(
                        anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH),
                        name='bbox_width')
                    box_height = tf.identity(
                        anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH),
                        name='bbox_height')

                    self._activation_summary(delta_x, 'delta_x')
                    self._activation_summary(delta_y, 'delta_y')
                    self._activation_summary(delta_w, 'delta_w')
                    self._activation_summary(delta_h, 'delta_h')

                    self._activation_summary(box_center_x, 'bbox_cx')
                    self._activation_summary(box_center_y, 'bbox_cy')
                    self._activation_summary(box_width, 'bbox_width')
                    self._activation_summary(box_height, 'bbox_height')

                    with tf.variable_scope('trimming'):
                        xmins, ymins, xmaxs, ymaxs = util.bbox_transform([
                            box_center_x, box_center_y, box_width, box_height
                        ])

                        # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
                        # pixels. Same for y.
                        xmins = tf.minimum(tf.maximum(0.0, xmins),
                                           mc.IMAGE_WIDTH - 1.0,
                                           name='bbox_xmin')
                        self._activation_summary(xmins, 'box_xmin')

                        ymins = tf.minimum(tf.maximum(0.0, ymins),
                                           mc.IMAGE_HEIGHT - 1.0,
                                           name='bbox_ymin')
                        self._activation_summary(ymins, 'box_ymin')

                        xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0,
                                                      xmaxs),
                                           0.0,
                                           name='bbox_xmax')
                        self._activation_summary(xmaxs, 'box_xmax')

                        ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0,
                                                      ymaxs),
                                           0.0,
                                           name='bbox_ymax')
                        self._activation_summary(ymaxs, 'box_ymax')

                        det_boxes = tf.transpose(tf.stack(
                            util.bbox_transform_inv(
                                [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0),
                                                 name='bbox')
                    return det_boxes
예제 #12
0
        if (IMAGE_TYPE == 'GROUND_TRUTH'):
            xmin = int(obj[2])
            ymin = int(obj[3])
            xmax = int(obj[4])
            ymax = int(obj[5])

        print(filename)
        assert xmin >= 0.0 and xmin <= xmax, \
            'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
                .format(xmin, xmax, index)

        assert ymin >= 0.0 and ymin <= ymax, \
            'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
                .format(ymin, ymax, index)

        x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
        bboxes.append([x, y, w, h])

        # print(im.size())
        im = im.astype(np.float32, copy=False)
        im = cv2.resize(im, (1920, 1200))
        color = (0, 0, 255)
        cv2.rectangle(im, (xmin, ymin), (xmax, ymax), color, 1)

        file_name = index
        out_file_name = os.path.join("./data/out/test_example",
                                     'out_' + file_name + ".png")
        cv2.imwrite(out_file_name, im)
        print('Image detection output saved to {}'.format(out_file_name))

    idx2annotation[index] = bboxes
예제 #13
0
    def _add_yolo_interpret_graph(self):
        """Interpret yolo output."""
        mc = self.mc

        with tf.variable_scope('interpret_output') as scope:
            # TODO(jeff): add summary
            N = mc.BATCH_SIZE
            H, W, B = mc.NET_OUT_SHAPE
            C = mc.CLASSES
            preds = self.preds
            preds = tf.reshape(self.preds, (N, H, W, B, 5 + C))

            # confidence
            self.pred_conf = tf.sigmoid(tf.reshape(preds[:, :, :, :, 5],
                                                   (N, H, W, B, 1)),
                                        name='conf')

            # bbox scale
            self.bbox_x = tf.reshape(tf.add(
                tf.sigmoid(preds[:, :, :, :, 0]),
                tf.reshape(tf.to_float(tf.range(0, W, 1)), (1, 1, W, 1))),
                                     (N, H, W, B, 1),
                                     name='bbox_x_ratio')
            self.bbox_y = tf.reshape(tf.add(
                tf.sigmoid(preds[:, :, :, :, 1]),
                tf.reshape(tf.to_float(tf.range(0, H, 1)), (1, H, 1, 1))),
                                     (N, H, W, B, 1),
                                     name='bbox_y_ratio')
            self.bbox_w = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 2]),
                                                 mc.ANCHOR_BOX[:, :, :, 0]),
                                     (N, H, W, B, 1),
                                     name='bbox_w_ratio')
            self.bbox_h = tf.reshape(tf.multiply(tf.exp(preds[:, :, :, :, 3]),
                                                 mc.ANCHOR_BOX[:, :, :, 1]),
                                     (N, H, W, B, 1),
                                     name='bbox_h_ratio')
            self.bbox = tf.stack(
                [self.bbox_x, self.bbox_y, self.bbox_w, self.bbox_h],
                axis=4,
                name='bbox_ratio')

            # bbox prediction
            w_scale = float(mc.IMAGE_WIDTH) / W
            h_scale = float(mc.IMAGE_HEIGHT) / H
            self.raw_boxes = tf.reshape(tf.stack([
                self.bbox_x * w_scale, self.bbox_y * h_scale,
                self.bbox_w * w_scale, self.bbox_h * h_scale
            ],
                                                 axis=4), (N, H * W * B, 4),
                                        name='raw_bbox')

            # trim bbox
            self.det_boxes = tf.py_func(lambda x: util.bbox_transform_inv(x), [
                self._trim_bbox(
                    tf.py_func(lambda x: util.bbox_transform(x),
                               [self.raw_boxes], tf.float32))
            ],
                                        tf.float32,
                                        name='det_boxes')

            # prob
            self.probs = tf.multiply(self._smooth_softmax(preds[:, :, :, :,
                                                                5:]),
                                     self.pred_conf,
                                     name='probs')

            # class prediction
            self.det_probs = tf.reshape(
                #tf.reduce_max(self.probs, 4),
                self.probs,
                (N, H * W * B, C),
                name='score')
            self.det_class = tf.reshape(tf.argmax(self.probs, 4),
                                        (N, H * W * B),
                                        name='class_idx')
예제 #14
0
    def _add_interpretation_graph(self):
        """Interpret NN output."""
        mc = self.mc

        with tf.variable_scope('interpret_output') as scope:
            preds = self.preds

            # probability
            num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES
            self.pred_class_probs = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(preds[:, :, :, :num_class_probs],
                               [-1, mc.CLASSES])),
                [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES],
                name='pred_class_probs')

            # confidence
            num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs
            self.pred_conf = tf.sigmoid(tf.reshape(
                preds[:, :, :, num_class_probs:num_confidence_scores],
                [mc.BATCH_SIZE, mc.ANCHORS]),
                                        name='pred_confidence_score')

            # bbox_delta
            self.pred_box_delta = tf.reshape(
                preds[:, :, :, num_confidence_scores:],
                [mc.BATCH_SIZE, mc.ANCHORS, self.num_mask_params],
                name='bbox_delta')

            # number of object. Used to normalize bbox and classification loss
            self.num_objects = tf.reduce_sum(self.input_mask,
                                             name='num_objects')

        with tf.variable_scope('bbox') as scope:
            with tf.variable_scope('stretching'):
                if self.mc.EIGHT_POINT_REGRESSION:
                    if mc.ENCODING_TYPE == 'normal':
                        delta_x, delta_y, delta_w, delta_h, \
                        delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack(
                            self.pred_box_delta, axis=2)
                    else:
                        delta_xmin, delta_ymin, delta_xmax, delta_ymax, \
                        delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack(
                            self.pred_box_delta, axis=2)
                else:
                    if mc.ENCODING_TYPE == 'normal':
                        delta_x, delta_y, delta_w, delta_h = tf.unstack(
                            self.pred_box_delta, axis=2)
                    else:
                        delta_xmin, delta_ymin, delta_xmax, delta_ymax = tf.unstack(
                            self.pred_box_delta, axis=2)

                anchor_x = mc.ANCHOR_BOX[:, 0]
                anchor_y = mc.ANCHOR_BOX[:, 1]
                anchor_w = mc.ANCHOR_BOX[:, 2]
                anchor_h = mc.ANCHOR_BOX[:, 3]

                if mc.ENCODING_TYPE == 'asymmetric_linear':
                    xmins_a, ymins_a, xmaxs_a, ymaxs_a = util.bbox_transform(
                        np.transpose(mc.ANCHOR_BOX))
                    xmins = tf.identity(xmins_a + delta_xmin * anchor_w,
                                        name='bbox_xmin_uncropped')
                    ymins = tf.identity(ymins_a + delta_ymin * anchor_h,
                                        name='bbox_ymin_uncropped')
                    xmaxs = tf.identity(xmaxs_a + delta_xmax * anchor_w,
                                        name='bbox_xmax_uncropped')
                    ymaxs = tf.identity(ymaxs_a + delta_ymax * anchor_h,
                                        name='bbox_ymax_uncropped')
                    box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv(
                        [xmins, ymins, xmaxs, ymaxs])
                    self._activation_summary(delta_xmin, 'delta_xmin')
                    self._activation_summary(delta_ymin, 'delta_ymin')
                    self._activation_summary(delta_xmax, 'delta_xmax')
                    self._activation_summary(delta_ymax, 'delta_ymax')
                elif mc.ENCODING_TYPE == 'asymmetric_log':
                    EPSILON = 0.5
                    xmins = tf.identity(
                        anchor_x -
                        (anchor_w *
                         (util.safe_exp(delta_xmin, mc.EXP_THRESH) - EPSILON)),
                        name='bbox_xmin_uncropped')
                    ymins = tf.identity(
                        anchor_y -
                        (anchor_h *
                         (util.safe_exp(delta_ymin, mc.EXP_THRESH) - EPSILON)),
                        name='bbox_ymin_uncropped')
                    xmaxs = tf.identity(
                        anchor_x +
                        (anchor_w *
                         (util.safe_exp(delta_xmax, mc.EXP_THRESH) - EPSILON)),
                        name='bbox_xmax_uncropped')
                    ymaxs = tf.identity(
                        anchor_y +
                        (anchor_h *
                         (util.safe_exp(delta_ymax, mc.EXP_THRESH) - EPSILON)),
                        name='bbox_ymax_uncropped')
                    box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv(
                        [xmins, ymins, xmaxs, ymaxs])
                    self._activation_summary(delta_xmin, 'delta_xmin')
                    self._activation_summary(delta_ymin, 'delta_ymin')
                    self._activation_summary(delta_xmax, 'delta_xmax')
                    self._activation_summary(delta_ymax, 'delta_ymax')
                else:
                    box_center_x = tf.identity(anchor_x + delta_x * anchor_w,
                                               name='bbox_cx')
                    box_center_y = tf.identity(anchor_y + delta_y * anchor_h,
                                               name='bbox_cy')
                    box_width = tf.identity(
                        anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH),
                        name='bbox_width')
                    box_height = tf.identity(
                        anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH),
                        name='bbox_height')
                    self._activation_summary(delta_x, 'delta_x')
                    self._activation_summary(delta_y, 'delta_y')
                    self._activation_summary(delta_w, 'delta_w')
                    self._activation_summary(delta_h, 'delta_h')

                self._activation_summary(box_center_x, 'bbox_cx')
                self._activation_summary(box_center_y, 'bbox_cy')
                self._activation_summary(box_width, 'bbox_width')
                self._activation_summary(box_height, 'bbox_height')

                if self.mc.EIGHT_POINT_REGRESSION:
                    EPSILON = 1e-8
                    anchor_diag = (mc.ANCHOR_BOX[:, 2]**2 +
                                   mc.ANCHOR_BOX[:, 3]**2)**(0.5)
                    box_of1 = tf.identity(
                        (anchor_diag * util.safe_exp(delta_of1, mc.EXP_THRESH))
                        - EPSILON,
                        name='bbox_of1')
                    box_of2 = tf.identity(
                        (anchor_diag * util.safe_exp(delta_of2, mc.EXP_THRESH))
                        - EPSILON,
                        name='bbox_of2')
                    box_of3 = tf.identity(
                        (anchor_diag * util.safe_exp(delta_of3, mc.EXP_THRESH))
                        - EPSILON,
                        name='bbox_of3')
                    box_of4 = tf.identity(
                        (anchor_diag * util.safe_exp(delta_of4, mc.EXP_THRESH))
                        - EPSILON,
                        name='bbox_of4')
                    self._activation_summary(delta_of1, 'delta_of1')
                    self._activation_summary(delta_of2, 'delta_of2')
                    self._activation_summary(delta_of3, 'delta_of3')
                    self._activation_summary(delta_of4, 'delta_of4')
                    self._activation_summary(box_of1, 'box_of1')
                    self._activation_summary(box_of2, 'box_of2')
                    self._activation_summary(box_of3, 'box_of3')
                    self._activation_summary(box_of4, 'box_of4')

            with tf.variable_scope('trimming'):
                if self.mc.EIGHT_POINT_REGRESSION:
                    xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 = util.bbox_transform2(
                        [
                            box_center_x, box_center_y, box_width, box_height,
                            box_of1, box_of2, box_of3, box_of4
                        ])
                else:
                    if mc.ENCODING_TYPE == 'normal':
                        xmins, ymins, xmaxs, ymaxs = util.bbox_transform([
                            box_center_x, box_center_y, box_width, box_height
                        ])

                if self.mc.EIGHT_POINT_REGRESSION:
                    self.det_boxes_uncropped = tf.transpose(
                        tf.stack(
                            util.bbox_transform_inv2([
                                xmins, ymins, xmaxs, ymaxs, box_of1, box_of2,
                                box_of3, box_of4
                            ])), (1, 2, 0),
                        name='bbox_uncropped')
                else:
                    self.det_boxes_uncropped = tf.transpose(
                        tf.stack(
                            util.bbox_transform_inv(
                                [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0),
                        name='bbox_uncropped')
                # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
                # pixels. Same for y.
                xmins = tf.minimum(tf.maximum(0.0, xmins),
                                   mc.IMAGE_WIDTH - 1.0,
                                   name='bbox_xmin')
                self._activation_summary(xmins, 'box_xmin')

                ymins = tf.minimum(tf.maximum(0.0, ymins),
                                   mc.IMAGE_HEIGHT - 1.0,
                                   name='bbox_ymin')
                self._activation_summary(ymins, 'box_ymin')

                xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs),
                                   0.0,
                                   name='bbox_xmax')
                self._activation_summary(xmaxs, 'box_xmax')

                ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs),
                                   0.0,
                                   name='bbox_ymax')
                self._activation_summary(ymaxs, 'box_ymax')

                if self.mc.EIGHT_POINT_REGRESSION:
                    self.det_boxes = tf.transpose(tf.stack(
                        util.bbox_transform_inv2([
                            xmins, ymins, xmaxs, ymaxs, box_of1, box_of2,
                            box_of3, box_of4
                        ])), (1, 2, 0),
                                                  name='bbox')
                else:
                    self.det_boxes = tf.transpose(tf.stack(
                        util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])),
                                                  (1, 2, 0),
                                                  name='bbox')

        with tf.variable_scope('IOU'):

            def _tensor_iou(box1, box2):
                with tf.variable_scope('intersection'):
                    xmin = tf.maximum(box1[0], box2[0], name='xmin')
                    ymin = tf.maximum(box1[1], box2[1], name='ymin')
                    xmax = tf.minimum(box1[2], box2[2], name='xmax')
                    ymax = tf.minimum(box1[3], box2[3], name='ymax')

                    w = tf.maximum(0.0, xmax - xmin, name='inter_w')
                    h = tf.maximum(0.0, ymax - ymin, name='inter_h')
                    intersection = tf.multiply(w, h, name='intersection')

                with tf.variable_scope('union'):
                    w1 = tf.subtract(box1[2], box1[0], name='w1')
                    h1 = tf.subtract(box1[3], box1[1], name='h1')
                    w2 = tf.subtract(box2[2], box2[0], name='w2')
                    h2 = tf.subtract(box2[3], box2[1], name='h2')

                    union = w1 * h1 + w2 * h2 - intersection

                return intersection/(union+mc.EPSILON) \
                    * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS])

            if self.mc.EIGHT_POINT_REGRESSION:
                tensor_det_boxes = util.bbox_transform2(
                    tf.unstack(self.det_boxes, axis=2))
                tensor_input_boxes = util.bbox_transform2(
                    tf.unstack(self.box_input, axis=2))
            else:
                tensor_det_boxes = util.bbox_transform(
                    tf.unstack(self.det_boxes, axis=2))
                tensor_input_boxes = util.bbox_transform(
                    tf.unstack(self.box_input, axis=2))

            self.ious = self.ious.assign(
                _tensor_iou(tensor_det_boxes, tensor_input_boxes))
            self._activation_summary(self.ious, 'conf_score')

        with tf.variable_scope('probability') as scope:
            self._activation_summary(self.pred_class_probs, 'class_probs')

            probs = tf.multiply(self.pred_class_probs,
                                tf.reshape(self.pred_conf,
                                           [mc.BATCH_SIZE, mc.ANCHORS, 1]),
                                name='final_class_prob')

            self._activation_summary(probs, 'final_class_prob')

            self.det_probs = tf.reduce_max(probs, 2, name='score')
            self.det_class = tf.argmax(probs, 2, name='class_idx')
예제 #15
0
    def Get_feed_data(self):
        mc = self.mc
        batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data(
            shuffle=True)

        batch_gt_boxes = np.array(batch_gt_boxes)
        batch_gt_labels = np.array(batch_gt_labels)
        batch_image = np.array(batch_image)

        input_images = []

        gt_data = []

        #print ('[Get_feed_data] 0')
        #print ('len(batch_gt_boxes):',len(batch_gt_boxes))
        for i in range(0, len(batch_gt_boxes)):
            #print ('------------{} get feed data'.format(i))
            im = batch_image[i]
            im -= mc.BGR_MEANS
            gt_bbox = np.array(batch_gt_boxes[i])
            gt_label = np.array(batch_gt_labels[i])

            if len(gt_bbox) == 0:
                print('len(gt_bbox) == 0')
                raw_input('pause')

            im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox)
            #print ('3---------------------------------')
            #anno_box_filter_idx = [i for i in range(len(gt_boxes))]
            assert len(anno_box_filter_idx) == len(gt_bbox)
            assert len(gt_bbox) != 0

            lables = []
            for idx in anno_box_filter_idx:
                lables.append(gt_label[idx])
            gt_label = np.array(lables)

            orig_h, orig_w, _ = [float(v) for v in im.shape]

            #mirror
            gt_bbox[:, 0::2] *= orig_w
            gt_bbox[:, 1::2] *= orig_h
            gt_bbox_center = np.array(
                [bbox_transform_inv(box) for box in gt_bbox])
            if np.random.randint(2) > 0.5:
                im = im[:, ::-1, :]
                gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0]
            gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center])
            gt_bbox[:, 0::2] /= orig_w
            gt_bbox[:, 1::2] /= orig_h

            #print ('[Get_feed_data]gt_bbox:',gt_bbox)

            im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT))
            input_images.append(im)
            '''
          filename = 'logs/' + str(i) + '.jpg'
          self.draw_annno(im,gt_bbox,filename)
          print ('---------save :',filename)
          #cv2.waitKey(20)
          '''
            #raw_input('pasue')

            # scale image
            #image_anno = im + mc.BGR_MEANS
            #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg')
            #gt_data.append([i,])
            num = len(gt_bbox)

            for j in range(0, num):
                #gt_data.append([i,gt_label[j],0,gt_bbox[j][0],gt_bbox[j][1],gt_bbox[j][2],gt_bbox[j][3]])
                gt_data.append([
                    float(i),
                    float(gt_label[j]),
                    float(0),
                    float(gt_bbox[j][0]),
                    float(gt_bbox[j][1]),
                    float(gt_bbox[j][2]),
                    float(gt_bbox[j][3])
                ])
            #batch_ids = np.ones((num ,1))*i
            #instance_ids = np.ones((num ,1))
            #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1))
            #print ('4---------------------------------')

        gt_boxes, gt_labels = self.parse_gt_data(gt_data)

        all_match_indices, all_match_overlaps = self._match_bbox(
            mc.ANCHOR_BOX, gt_boxes)
        assert len(all_match_indices) != 0

        gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense(
            gt_boxes, gt_labels, all_match_indices)
        assert len(gt_bbox) != 0

        return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps
예제 #16
0
    def _add_interpretation_graph(self):
        """Interpret NN output."""
        mc = self.mc

        with tf.variable_scope('interpret_output') as scope:
            preds = self.preds

            # probability
            num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES

            if mc.CLASSES == 1:
                self.pred_class_probs = tf.reshape(
                    tf.sigmoid(
                        tf.reshape(preds[:, :, :, :num_class_probs],
                                   [-1, mc.CLASSES])),
                    [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES],
                    name='pred_class_probs')
            else:
                self.pred_class_probs = tf.reshape(
                    tf.nn.softmax(
                        tf.reshape(preds[:, :, :, :num_class_probs],
                                   [-1, mc.CLASSES])),
                    [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES],
                    name='pred_class_probs')

            # bbox_delta
            self.pred_box_delta = tf.reshape(preds[:, :, :, num_class_probs:],
                                             [mc.BATCH_SIZE, mc.ANCHORS, 4],
                                             name='bbox_delta')

        with tf.variable_scope('bbox') as scope:
            with tf.variable_scope('stretching'):
                delta_x, delta_y, delta_w, delta_h = tf.unstack(
                    self.pred_box_delta, axis=2)

                anchor_x = mc.ANCHOR_BOX[:, 0]
                anchor_y = mc.ANCHOR_BOX[:, 1]
                anchor_w = mc.ANCHOR_BOX[:, 2]
                anchor_h = mc.ANCHOR_BOX[:, 3]

                box_center_x = tf.identity(anchor_x + delta_x * anchor_w,
                                           name='bbox_cx')
                box_center_y = tf.identity(anchor_y + delta_y * anchor_h,
                                           name='bbox_cy')
                box_width = tf.identity(anchor_w *
                                        util.safe_exp(delta_w, mc.EXP_THRESH),
                                        name='bbox_width')
                box_height = tf.identity(anchor_h *
                                         util.safe_exp(delta_h, mc.EXP_THRESH),
                                         name='bbox_height')

                self._activation_summary(delta_x, 'delta_x')
                self._activation_summary(delta_y, 'delta_y')
                self._activation_summary(delta_w, 'delta_w')
                self._activation_summary(delta_h, 'delta_h')

                self._activation_summary(box_center_x, 'bbox_cx')
                self._activation_summary(box_center_y, 'bbox_cy')
                self._activation_summary(box_width, 'bbox_width')
                self._activation_summary(box_height, 'bbox_height')

            with tf.variable_scope('trimming'):
                xmins, ymins, xmaxs, ymaxs = util.bbox_transform(
                    [box_center_x, box_center_y, box_width, box_height])

                # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
                # pixels. Same for y.
                xmins = tf.minimum(tf.maximum(0.0, xmins),
                                   mc.IMAGE_WIDTH - 1.0,
                                   name='bbox_xmin')
                self._activation_summary(xmins, 'box_xmin')

                ymins = tf.minimum(tf.maximum(0.0, ymins),
                                   mc.IMAGE_HEIGHT - 1.0,
                                   name='bbox_ymin')
                self._activation_summary(ymins, 'box_ymin')

                xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs),
                                   0.0,
                                   name='bbox_xmax')
                self._activation_summary(xmaxs, 'box_xmax')

                ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs),
                                   0.0,
                                   name='bbox_ymax')
                self._activation_summary(ymaxs, 'box_ymax')

                self.det_boxes = tf.transpose(tf.stack(
                    util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])),
                                              (1, 2, 0),
                                              name='bbox')

        with tf.variable_scope('probability') as scope:
            self._activation_summary(self.pred_class_probs, 'class_probs')
            probs = self.pred_class_probs
            self.det_probs = tf.reduce_max(probs, 2, name='score')
            self.det_class = tf.argmax(probs, 2, name='class_idx')
예제 #17
0
    def _add_interpretation_graph(self):
        """Interpret NN output."""
        mc = self.mc

        with tf.variable_scope('interpret_output') as scope:
            preds = self.preds

            # probability
            num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES
            self.pred_class_probs = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(preds[:, :, :, :num_class_probs],
                               [-1, mc.CLASSES])),
                [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES],
                name='pred_class_probs')

            # confidence
            num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs
            self.pred_conf = tf.sigmoid(tf.reshape(
                preds[:, :, :, num_class_probs:num_confidence_scores],
                [mc.BATCH_SIZE, mc.ANCHORS]),
                                        name='pred_confidence_score')

            # bbox_delta
            self.pred_box_delta = tf.reshape(preds[:, :, :,
                                                   num_confidence_scores:],
                                             [mc.BATCH_SIZE, mc.ANCHORS, 4],
                                             name='bbox_delta')

            # number of object. Used to normalize bbox and classification loss
            self.num_objects = tf.reduce_sum(self.input_mask,
                                             name='num_objects')

        with tf.variable_scope('bbox') as scope:
            with tf.variable_scope('stretching'):
                delta_x, delta_y, delta_w, delta_h = tf.unstack(
                    self.pred_box_delta, axis=2)

                anchor_x = mc.ANCHOR_BOX[:, 0]
                anchor_y = mc.ANCHOR_BOX[:, 1]
                anchor_w = mc.ANCHOR_BOX[:, 2]
                anchor_h = mc.ANCHOR_BOX[:, 3]

                box_center_x = tf.identity(anchor_x + delta_x * anchor_w,
                                           name='bbox_cx')
                box_center_y = tf.identity(anchor_y + delta_y * anchor_h,
                                           name='bbox_cy')
                box_width = tf.identity(anchor_w *
                                        util.safe_exp(delta_w, mc.EXP_THRESH),
                                        name='bbox_width')
                box_height = tf.identity(anchor_h *
                                         util.safe_exp(delta_h, mc.EXP_THRESH),
                                         name='bbox_height')

                self._activation_summary(delta_x, 'delta_x')
                self._activation_summary(delta_y, 'delta_y')
                self._activation_summary(delta_w, 'delta_w')
                self._activation_summary(delta_h, 'delta_h')

                self._activation_summary(box_center_x, 'bbox_cx')
                self._activation_summary(box_center_y, 'bbox_cy')
                self._activation_summary(box_width, 'bbox_width')
                self._activation_summary(box_height, 'bbox_height')

            with tf.variable_scope('trimming'):
                xmins, ymins, xmaxs, ymaxs = util.bbox_transform(
                    [box_center_x, box_center_y, box_width, box_height])

                # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
                # pixels. Same for y.
                xmins = tf.minimum(tf.maximum(0.0, xmins),
                                   mc.IMAGE_WIDTH - 1.0,
                                   name='bbox_xmin')
                self._activation_summary(xmins, 'box_xmin')

                ymins = tf.minimum(tf.maximum(0.0, ymins),
                                   mc.IMAGE_HEIGHT - 1.0,
                                   name='bbox_ymin')
                self._activation_summary(ymins, 'box_ymin')

                xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs),
                                   0.0,
                                   name='bbox_xmax')
                self._activation_summary(xmaxs, 'box_xmax')

                ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs),
                                   0.0,
                                   name='bbox_ymax')
                self._activation_summary(ymaxs, 'box_ymax')

                self.det_boxes = tf.transpose(tf.stack(
                    util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])),
                                              (1, 2, 0),
                                              name='bbox')

        with tf.variable_scope('IOU'):

            def _tensor_iou(box1, box2):
                with tf.variable_scope('intersection'):
                    xmin = tf.maximum(box1[0], box2[0], name='xmin')
                    ymin = tf.maximum(box1[1], box2[1], name='ymin')
                    xmax = tf.minimum(box1[2], box2[2], name='xmax')
                    ymax = tf.minimum(box1[3], box2[3], name='ymax')

                    w = tf.maximum(0.0, xmax - xmin, name='inter_w')
                    h = tf.maximum(0.0, ymax - ymin, name='inter_h')
                    intersection = tf.multiply(w, h, name='intersection')

                with tf.variable_scope('union'):
                    w1 = tf.subtract(box1[2], box1[0], name='w1')
                    h1 = tf.subtract(box1[3], box1[1], name='h1')
                    w2 = tf.subtract(box2[2], box2[0], name='w2')
                    h2 = tf.subtract(box2[3], box2[1], name='h2')

                    union = w1 * h1 + w2 * h2 - intersection

                return intersection/(union+mc.EPSILON) \
                    * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS])

            self.ious = self.ious.assign(
                _tensor_iou(
                    util.bbox_transform(tf.unstack(self.det_boxes, axis=2)),
                    util.bbox_transform(tf.unstack(self.box_input, axis=2))))
            self._activation_summary(self.ious, 'conf_score')

        with tf.variable_scope('probability') as scope:
            self._activation_summary(self.pred_class_probs, 'class_probs')

            probs = tf.multiply(self.pred_class_probs,
                                tf.reshape(self.pred_conf,
                                           [mc.BATCH_SIZE, mc.ANCHORS, 1]),
                                name='final_class_prob')

            self._activation_summary(probs, 'final_class_prob')

            self.det_probs = tf.reduce_max(probs, 2, name='score')
            self.det_class = tf.argmax(probs, 2, name='class_idx')
예제 #18
0
  def _load_annotation(self):
    def _get_obj_level(obj):
      height = float(obj[7]) - float(obj[5]) + 1
      truncation = float(obj[1])
      occlusion = float(obj[2])
      if height >= 40 and truncation <= 0.15 and occlusion <= 0:
          return 1
      elif height >= 25 and truncation <= 0.3 and occlusion <= 1:
          return 2
      elif height >= 25 and truncation <= 0.5 and occlusion <= 2:
          return 3
      else:
          return 4

    idx2annotation = {}
    idx2annotation1 = {}
    idx2annotation2 = {}
    idx2annotation3 = {}

    # ff = open('train.txt', 'w')
    for index in self._image_idx:
      filename = os.path.join(self._label_path, index+'.txt')
      print(self._label_path, index)
      with open(filename, 'r') as f:
        lines = f.readlines()
      f.close()
      bboxes = []
      landmarks = []
      poses = []
      ages = []

      falsex = 0
      for line in lines:
        obj = line.strip().split(' ')
        #print obj
        try:
          cls = self._class_to_idx[obj[0].lower().strip()]
          pose = self._pose_to_idx[obj[1].lower().strip()]

          if '01baby' in obj[2]:
            obj2 = obj[2].split('01baby')
            age = self._age_to_idx['01baby'.strip()]
          else:
            obj2 = obj[2].split('02adult')
            age = self._age_to_idx['02adult'.strip()]
          #print cls
        except:
          continue

        if self.mc.EXCLUDE_HARD_EXAMPLES and _get_obj_level(obj) > 3:
          continue

        xmin = float(obj2[1])#(obj[3])
        ymin = float(obj[3])
        xmax = float(obj[4])
        ymax = float(obj[5])

        # print(xmin, xmax)
        # if xmin < 0.0:
        #   falsex = 1
        #   continue
        # if ymin < 0.0:
        #   falsex = 1
        #   continue

        assert xmin >= 0.0 and xmin <= xmax, \
            'Invalid bounding box x-coord xmin {} or xmax {} at {}.txt' \
                .format(xmin, xmax, index)
        assert ymin >= 0.0 and ymin <= ymax, \
            'Invalid bounding box y-coord ymin {} or ymax {} at {}.txt' \
                .format(ymin, ymax, index)
        x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
        bboxes.append([x, y, w, h, cls])

        x0 = float(obj[6])
        x1 = float(obj[7])
        x2 = float(obj[8])
        x3 = float(obj[9])
        x4 = float(obj[10])
        y0 = float(obj[11])
        y1 = float(obj[12])
        y2 = float(obj[13])
        y3 = float(obj[14])
        y4 = float(obj[15])

        landmarks.append([x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, cls])
        poses.append([pose])
        ages.append([age])

      # if falsex==1:
      #   continue
      # ff.write(index+"\n")

      idx2annotation[index] = bboxes
      idx2annotation1[index] = landmarks
      idx2annotation2[index] = poses
      idx2annotation3[index] = ages

    # ff.close()
    # sdfs
    return idx2annotation, idx2annotation1,  idx2annotation2, idx2annotation3
예제 #19
0
    def analyze_detections(self, detection_file_dir, det_error_file):
        def _save_detection(f, idx, error_type, det, score):
            f.write(
                '{:s} {:s} {:.1f} {:.1f} {:.1f} {:.1f} {:s} {:.3f}\n'.format(
                    idx, error_type, det[0] - det[2] / 2.,
                    det[1] - det[3] / 2., det[0] + det[2] / 2.,
                    det[1] + det[3] / 2., self._classes[int(det[4])], score))

        # load detections
        self._det_rois = {}
        for idx in self._image_idx:
            det_file_name = os.path.join(detection_file_dir, idx + '.txt')
            with open(det_file_name) as f:
                lines = f.readlines()
            f.close()
            bboxes = []
            for line in lines:
                obj = line.strip().split(' ')
                cls = self._class_to_idx[obj[0].lower().strip()]
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])
                score = float(obj[-1])

                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                bboxes.append([x, y, w, h, cls, score])
            bboxes.sort(key=lambda x: x[-1], reverse=True)
            self._det_rois[idx] = bboxes

        # do error analysis
        num_objs = 0.
        num_dets = 0.
        num_correct = 0.
        num_loc_error = 0.
        num_cls_error = 0.
        num_bg_error = 0.
        num_repeated_error = 0.
        num_detected_obj = 0.

        with open(det_error_file, 'w') as f:
            for idx in self._image_idx:
                gt_bboxes = np.array(self._rois[idx])
                num_objs += len(gt_bboxes)
                detected = [False] * len(gt_bboxes)

                det_bboxes = self._det_rois[idx]
                if len(gt_bboxes) < 1:
                    continue

                for i, det in enumerate(det_bboxes):
                    if i < len(gt_bboxes):
                        num_dets += 1
                    ious = batch_iou(gt_bboxes[:, :4], det[:4])
                    max_iou = np.max(ious)
                    gt_idx = np.argmax(ious)
                    if max_iou > 0.1:
                        if gt_bboxes[gt_idx, 4] == det[4]:
                            if max_iou >= 0.5:
                                if i < len(gt_bboxes):
                                    if not detected[gt_idx]:
                                        num_correct += 1
                                        detected[gt_idx] = True
                                    else:
                                        num_repeated_error += 1
                            else:
                                if i < len(gt_bboxes):
                                    num_loc_error += 1
                                    _save_detection(f, idx, 'loc', det, det[5])
                        else:
                            if i < len(gt_bboxes):
                                num_cls_error += 1
                                _save_detection(f, idx, 'cls', det, det[5])
                    else:
                        if i < len(gt_bboxes):
                            num_bg_error += 1
                            _save_detection(f, idx, 'bg', det, det[5])

                for i, gt in enumerate(gt_bboxes):
                    if not detected[i]:
                        _save_detection(f, idx, 'missed', gt, -1.0)
                num_detected_obj += sum(detected)
        f.close()

        print('Detection Analysis:')
        print('    Number of detections: {}'.format(num_dets))
        print('    Number of objects: {}'.format(num_objs))
        print('    Percentage of correct detections: {}'.format(num_correct /
                                                                num_dets))
        print('    Percentage of localization error: {}'.format(num_loc_error /
                                                                num_dets))
        print('    Percentage of classification error: {}'.format(
            num_cls_error / num_dets))
        print('    Percentage of background error: {}'.format(num_bg_error /
                                                              num_dets))
        print('    Percentage of repeated detections: {}'.format(
            num_repeated_error / num_dets))
        print('    Recall: {}'.format(num_detected_obj / num_objs))

        out = {}
        out['num of detections'] = num_dets
        out['num of objects'] = num_objs
        out['% correct detections'] = num_correct / num_dets
        out['% localization error'] = num_loc_error / num_dets
        out['% classification error'] = num_cls_error / num_dets
        out['% background error'] = num_bg_error / num_dets
        out['% repeated error'] = num_repeated_error / num_dets
        out['% recall'] = num_detected_obj / num_objs

        return out
예제 #20
0
    def Get_feed_data(self):
        mc = self.mc
        batch_gt_boxes, batch_gt_labels, batch_image = self.read_batch_gt_data(
            shuffle=True)

        batch_gt_boxes = np.array(batch_gt_boxes)
        batch_gt_labels = np.array(batch_gt_labels)
        batch_image = np.array(batch_image)

        input_images = []

        gt_data = []
        for i in range(0, len(batch_gt_boxes)):
            im = batch_image[i]
            im -= mc.BGR_MEANS
            gt_bbox = np.array(batch_gt_boxes[i])
            gt_label = np.array(batch_gt_labels[i])

            im, gt_bbox, anno_box_filter_idx = self.Preprocess(im, gt_bbox)
            assert len(anno_box_filter_idx) == len(gt_bbox)

            lables = []
            for idx in anno_box_filter_idx:
                lables.append(gt_label[idx])

            #lables = [[gt_label[idx]] for idx in anno_box_filter_idx]
            gt_label = np.array(lables)
            orig_h, orig_w, _ = [float(v) for v in im.shape]

            #mirror
            gt_bbox[:, 0::2] *= orig_w
            gt_bbox[:, 1::2] *= orig_h
            gt_bbox_center = np.array(
                [bbox_transform_inv(box) for box in gt_bbox])
            if np.random.randint(2) > 0.5:
                im = im[:, ::-1, :]
                gt_bbox_center[:, 0] = orig_w - 1 - gt_bbox_center[:, 0]
            gt_bbox = np.array([bbox_transform(box) for box in gt_bbox_center])
            gt_bbox[:, 0::2] /= orig_w
            gt_bbox[:, 1::2] /= orig_h

            im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT))
            input_images.append(im)

            # scale image
            #image_anno = im + mc.BGR_MEANS
            #self.draw_annno(image_anno,gt_bbox,'test_' + str(i) + '.jpg')
            #gt_data.append([i,])
            num = len(gt_bbox)

            for j in range(0, num):
                gt_data.append([
                    i, gt_label[j], 0, gt_bbox[j][0], gt_bbox[j][1],
                    gt_bbox[j][2], gt_bbox[j][3]
                ])

            #batch_ids = np.ones((num ,1))*i
            #instance_ids = np.ones((num ,1))
            #gt_data.append(np.concatenate([batch_ids,gt_label,instance_ids,gt_bbox],axis=1))

        gt_boxes, gt_labels = self.parse_gt_data(gt_data)

        all_match_indices, all_match_overlaps = self._math_bbox(
            mc.ANCHOR_BOX, gt_boxes)
        gt_boxes_dense, gt_labels_dense, input_mask = self._sparse_to_dense(
            gt_boxes, gt_labels, all_match_indices)

        return input_images, gt_boxes_dense, gt_labels_dense, input_mask, all_match_overlaps