Пример #1
0
    def __parse_annotation(self, annotation):
        """
        读取annotation中image_path对应的图片,并将该图片进行resize(不改变图片的高宽比)
        获取annotation中所有的bbox,并将这些bbox的坐标(xmin, ymin, xmax, ymax)进行纠正,
        使得纠正后bbox在resize后的图片中的相对位置与纠正前bbox在resize前的图片中的相对位置相同

        :param annotation: 图片地址和bbox的坐标、类别,
        如:image_path xmin,ymin,xmax,ymax,class_ind xmin,ymin,xmax,ymax,class_ind ...
        :return: image和bboxes
        bboxes的shape为(N, 5),其中N表示一站图中有N个bbox,5表示(xmin, ymin, xmax, ymax, class_ind)
        """
        line = annotation.split()
        image_path = line[0]
        image = np.array(cv2.imread(image_path))
        bboxes = np.array([map(int, box.split(',')) for box in line[1:]])

        # 数据增强
        image, bboxes = data_aug.random_horizontal_flip(
            np.copy(image), np.copy(bboxes))
        image, bboxes = data_aug.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = data_aug.random_translate(np.copy(image),
                                                  np.copy(bboxes))
        # 进行resize操作, 不改变原图比例
        image, bboxes = img_preprocess2(
            np.copy(image), np.copy(bboxes),
            (self.__train_input_size, self.__train_input_size), True)
        return image, bboxes
Пример #2
0
def process_data(line, use_aug):
    if 'str' not in str(type(line)):
        line = line.decode()
    s = line.split()
    line_id = s[0]
    image_path = s[1]
    # print(image_path)
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = np.array(cv2.imread(image_path))
    ori_w = s[2]
    ori_h = s[3]
    labels = np.array(
        [list(map(lambda x: int(float(x)), box.split(','))) for box in s[4:]])
    # labels = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in s[1:]])

    if use_aug:
        image, labels = random_horizontal_flip(image, labels)
        image, labels = random_crop(image, labels)
        image, labels = random_translate(image, labels)

    image, labels = image_preporcess(np.copy(image),
                                     [cfg.input_image_h, cfg.input_image_w],
                                     np.copy(labels))

    output_h = cfg.input_image_h // cfg.down_ratio
    output_w = cfg.input_image_w // cfg.down_ratio
    hm = np.zeros((output_h, output_w, cfg.num_classes), dtype=np.float32)
    wh = np.zeros((cfg.max_objs, 2), dtype=np.float32)
    reg = np.zeros((cfg.max_objs, 2), dtype=np.float32)
    ind = np.zeros((cfg.max_objs), dtype=np.float32)
    reg_mask = np.zeros((cfg.max_objs), dtype=np.float32)

    for idx, label in enumerate(labels):
        # print("label", label)
        bbox = label[:4] / cfg.down_ratio
        class_id = label[4]
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        draw_umich_gaussian(hm[:, :, class_id], ct_int, radius)
        wh[idx] = 1. * w, 1. * h
        ind[idx] = ct_int[1] * output_w + ct_int[0]
        reg[idx] = ct - ct_int
        reg_mask[idx] = 1

    return image, hm, wh, reg, reg_mask, ind, ori_w, ori_h, line_id