def __parse_annotation(self, annotation): """ 读取annotation中image_path对应的图片,并将该图片进行resize(不改变图片的高宽比) 获取annotation中所有的bbox,并将这些bbox的坐标(xmin, ymin, xmax, ymax)进行纠正, 使得纠正后bbox在resize后的图片中的相对位置与纠正前bbox在resize前的图片中的相对位置相同 :param annotation: 图片地址和bbox的坐标、类别, 如:image_path xmin,ymin,xmax,ymax,class_ind xmin,ymin,xmax,ymax,class_ind ... :return: image和bboxes bboxes的shape为(N, 5),其中N表示一站图中有N个bbox,5表示(xmin, ymin, xmax, ymax, class_ind) """ line = annotation.split() image_path = line[0] image = np.array(cv2.imread(image_path)) bboxes = np.array([map(int, box.split(',')) for box in line[1:]]) # 数据增强 image, bboxes = data_aug.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = data_aug.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = data_aug.random_translate(np.copy(image), np.copy(bboxes)) # 进行resize操作, 不改变原图比例 image, bboxes = img_preprocess2( np.copy(image), np.copy(bboxes), (self.__train_input_size, self.__train_input_size), True) return image, bboxes
def process_data(line, use_aug): if 'str' not in str(type(line)): line = line.decode() s = line.split() line_id = s[0] image_path = s[1] # print(image_path) if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) ori_w = s[2] ori_h = s[3] labels = np.array( [list(map(lambda x: int(float(x)), box.split(','))) for box in s[4:]]) # labels = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in s[1:]]) if use_aug: image, labels = random_horizontal_flip(image, labels) image, labels = random_crop(image, labels) image, labels = random_translate(image, labels) image, labels = image_preporcess(np.copy(image), [cfg.input_image_h, cfg.input_image_w], np.copy(labels)) output_h = cfg.input_image_h // cfg.down_ratio output_w = cfg.input_image_w // cfg.down_ratio hm = np.zeros((output_h, output_w, cfg.num_classes), dtype=np.float32) wh = np.zeros((cfg.max_objs, 2), dtype=np.float32) reg = np.zeros((cfg.max_objs, 2), dtype=np.float32) ind = np.zeros((cfg.max_objs), dtype=np.float32) reg_mask = np.zeros((cfg.max_objs), dtype=np.float32) for idx, label in enumerate(labels): # print("label", label) bbox = label[:4] / cfg.down_ratio class_id = label[4] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[:, :, class_id], ct_int, radius) wh[idx] = 1. * w, 1. * h ind[idx] = ct_int[1] * output_w + ct_int[0] reg[idx] = ct - ct_int reg_mask[idx] = 1 return image, hm, wh, reg, reg_mask, ind, ori_w, ori_h, line_id