def create_rnet_data(save_dir_name='R_net_dataset', crop_size=24, use_rnet=False): def img2tensor(img): from torchvision import transforms pass def get_name_from_path(img_path): return osp.splitext(osp.split(img_path)[1])[0] def make_dir(save_dir): if not osp.exists(save_dir): os.makedirs(save_dir) def crop_img(img_np, crop_box, crop_size): # print('img_np:{}, crop_box:{}'.format(img_np, crop_box)) # print('img_np.shape:{}'.format(img_np.shape)) crop_img_np = img_np[crop_box[1]:crop_box[3], crop_box[0]:crop_box[2], :] # print('crop_img_np size:{}'.format(crop_img_np)) crop_img = Image.fromarray(crop_img_np) crop_img = crop_img.resize((crop_size, crop_size), resample=PIL.Image.BILINEAR) return crop_img def limit_box(box): new_box = [ min(max(0, int(box[i])), width if i % 2 == 0 else hight) for i in range(4) ] return new_box def cal_offset(face, box): offset = [ (face[0] - box[0]) / float(box[2] - box[0]), (face[1] - box[1]) / float(box[3] - box[1]), (face[2] - box[2]) / float(box[2] - box[0]), (face[3] - box[3]) / float(box[3] - box[1]), ] return offset def cal_landmark_offset(box, ldmk): if ldmk is None: return [] else: minx, miny = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] ldmk_offset = [ (ldmk[i] - [minx, miny][i % 2]) / float([w, h][i % 2]) for i in range(len(ldmk)) ] # print('box:{},ldmk:{},ldmk_offset:{}'.format(box, ldmk, ldmk_offset)) return ldmk_offset def txt_to_write(path, label, offset, ldmk_offset): s = '' s += '{} '.format(path) s += '{} '.format(label) for i in offset: s += '{} '.format(i) for i in ldmk_offset: s += '{} '.format(i) s += '\n' print(s) return s from train import load_net, config from config import DEVICE import torch # args = config() dataset_args = dataset_config() pnet = load_net(dataset_args, net_name='pnet').to(torch.device('cpu')) # dataset_args = dataset_config() # [img_num*[absolute_img_path,[faces_num*4(which is x1,y1,w,h)]]] cls_img_faces = create_pnet_data_txt_parser( txt_path=dataset_args.class_data_txt_path, img_dir=dataset_args.class_data_dir) # [absolute_img_path,[x1,x2,y1,y2],(x,y)of[left_eye,right_eye,nose,mouse_left, mouse_right]] ldmk_img_faces = landmark_dataset_txt_parser( txt_path=dataset_args.landmark_data_txt_path, img_dir=dataset_args.landmark_data_dir) img_faces = ldmk_img_faces + cls_img_faces # img_faces = cls_img_faces + ldmk_img_faces output_path = osp.join(dataset_args.output_path, save_dir_name) make_dir(output_path) txt_path = osp.join(output_path, '{}.txt'.format(save_dir_name)) txt = open(txt_path, 'a') for img_face in tqdm(img_faces): # print('img_face:{}'.format(img_face)) img_path = img_face[0] img_name = get_name_from_path(img_path) save_dir = osp.join(output_path, img_name) make_dir(save_dir) faces = np.array(img_face[1]) # print('faces.ndim:{}'.format(faces.ndim)) if faces.ndim is 1: faces = np.expand_dims(faces, 0) faces[:, :] = faces[:, (0, 2, 1, 3)] else: faces[:, 2] += faces[:, 0] faces[:, 3] += faces[:, 1] # print('faces:{}'.format(faces)) ldmk = None if len(img_face) < 3 else [int(i) for i in img_face[2]] img = load_img(img_path) width, hight = img.size # print('width:{}, hight:{}'.format(width, hight)) img_np = np.array(img) # print('img_np:{}'.format(img_np)) bounding_boxes = pnet_boxes(img, pnet, show_boxes=1) bounding_boxes = rnet_boxes(img, rnet, bounding_boxes) if use_rnet: rnet = load_net(args, net_name='rnet').to(torch.device('cpu')) bounding_boxes = rnet_boxes(img, rnet, bounding_boxes) # print('bounding_boxes:{}'.format(bounding_boxes[:, 4])) # ioumax = 0.0 for id, box in enumerate(bounding_boxes, start=1): # box[(4+1)float] # print('box:{}'.format(box)) box = limit_box(box) # print('box:{},faces:{}'.format(box, faces)) iou = IoU(box, faces) iou_max = iou.max() iou_index = iou.argmax() closet_face = faces[iou_index] # print('iou_max:{}, iou_index:{}'.format(iou_max, iou_index)) # ioumax = max(iou, iou_max) img_box = crop_img(img_np=img_np, crop_box=box, crop_size=crop_size) # img_box.show() label = None # [(0, 0.3), (0.4, 0.65), (0.65, 1.0)] if iou <= 0.3: label = 'n' img_box_path = osp.join(save_dir, '{}_{:.8f}.jpg'.format(id, iou_max)) img_box.save(img_box_path, format='jpeg') txt.write( txt_to_write( osp.relpath(img_box_path, osp.split(txt_path)[0]), label, [], [])) pass elif 0.4 <= iou <= 0.65: label = 'pf' if ldmk is None else 'l' img_box_path = osp.join(save_dir, '{}_{:.8f}.jpg'.format(id, iou_max)) img_box.save(img_box_path, format='jpeg') offset = cal_offset(closet_face, box) ldmk_offset = cal_landmark_offset(box, ldmk) txt.write( txt_to_write( osp.relpath(img_box_path, osp.split(txt_path)[0]), label, offset, ldmk_offset)) pass elif 0.65 < iou: label = 'p' if ldmk is None else 'l' img_box_path = osp.join(save_dir, '{}_{:.8f}.jpg'.format(id, iou_max)) img_box.save(img_box_path, format='jpeg') offset = cal_offset(closet_face, box) ldmk_offset = cal_landmark_offset(box, ldmk) txt.write( txt_to_write( osp.relpath(img_box_path, osp.split(txt_path)[0]), label, offset, ldmk_offset)) # print('iou:{}'.format(iou)) txt.close()
def get_crop_img_label_offset_ldmk(self, img, faces, ldmk, index): def get_crop_img(img_np, crop_box, crop_size): # print('img_np:{}, crop_box:{}'.format(img_np, crop_box)) # print('img_np.shape:{}'.format(img_np.shape)) crop_box = [int(i) for i in crop_box] crop_img_np = img_np[crop_box[1]:crop_box[3], crop_box[0]:crop_box[2], :] # print('crop_img_np size:{}'.format(crop_img_np.shape)) crop_img = Image.fromarray(crop_img_np, mode='RGB') # print('crop_img size:{}'.format(crop_img.size)) crop_img = crop_img.resize((crop_size, crop_size), resample=PIL.Image.BILINEAR) return crop_img def get_real_label(label): return {'n': 'n', 'np': 'n', 'pf': 'pf' if ldmk is None else 'l', 'p': 'p' if ldmk is None else 'l'}.get(label) def cal_offset(face, box): if box is None: return [] offset = [ (face[0] - box[0]) / float(box[2] - box[0]), (face[1] - box[1]) / float(box[3] - box[1]), (face[2] - box[2]) / float(box[2] - box[0]), (face[3] - box[3]) / float(box[3] - box[1]), ] return offset def cal_landmark_offset(box, ldmk): if ldmk is None or box is None: return [] else: minx, miny = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] ldmk_offset = [(ldmk[i] - [minx, miny][i % 2]) / float([w, h][i % 2]) for i in range(len(ldmk))] # print('box:{},ldmk:{},ldmk_offset:{}'.format(box, ldmk, ldmk_offset)) return ldmk_offset img_np = np.array(img) width, height = img.size # random.choice(['n', 'n', 'pf', 'p'], self.ratio) # chose face if self.pnet is None: # negative, negative partial, partial face, positive label = random.choice(['n', 'np', 'pf', 'p'], p=self.ratio) # label = 'np' # print('label:{}'.format(label)) iou_th = {'n': (0, 0.3), 'np': (0, 0.3), 'pf': (0.4, 0.65), 'p': (0.65, 1.0)}.get(label) sigma = {'n': 1, 'np': 0.3, 'pf': 0.1, 'p': 0.02}.get(label) face, face_max_size = None, None for i in range(10): face = faces[random.randint(len(faces))] face_max_size = max(face[2] - face[0], face[3] - face[1]) if face_max_size > self.crop_size: break crop_img = None crop_box = None for i in range(10): # if ct >= sample_num: break max_size = min(width, height) size = (uniform(-1.0, 1.0) * sigma + 1) * face_max_size # 保证大于剪切的尺寸要大于一个值 size = min(max(self.crop_size, size), max_size) # print('size:', size) x1, y1 = face[0], face[1] crop_x1, crop_y1 = (uniform(-1.0, 1.0) * sigma + 1) * x1, (uniform(-1.0, 1.0) * sigma + 1) * y1 crop_x1, crop_y1 = min(max(0, crop_x1), width - size), min(max(0, crop_y1), height - size) crop_box = np.array([int(crop_x1), int(crop_y1), int(crop_x1 + size), int(crop_y1 + size)]) # print('crop_box:', crop_box) # print('faces_two_points:', faces_two_points) iou = IoU(crop_box, np.array([face])) iou_max_idx = iou.argmax() iou = iou.max() # print('iou', iou) # iou值不符则跳过 if iou < iou_th[0] or iou > iou_th[1]: continue else: # print('img_np:{}'.format(img_np)) crop_img = get_crop_img(img_np, crop_box, self.crop_size) # crop_img.show() break return crop_img, get_real_label(label), cal_offset(face, crop_box), cal_landmark_offset(crop_box, ldmk) else: # negative, negative partial, partial face, positive # label = random.choice(['n', 'np', 'pf', 'p'], p=self.ratio) # label = 'np' # print('label:{}'.format(label)) if len(self.cache) != 0: self.img_faces.append(self.img_faces[index]) return self.cache.pop(0) iou_th = {'n': (0, 0.3), 'pf': (0.4, 0.65), 'p': (0.65, 1.0)} # sigma = {'n': 1, 'np': 0.3, 'pf': 0.1, 'p': 0.02} from detector import pnet_boxes, rnet_boxes bounding_boxes = pnet_boxes(img, self.pnet, show_boxes=False) if bounding_boxes is None: return None, None, None, None if self.rnet is not None: bounding_boxes_rnet = rnet_boxes(img, self.rnet, bounding_boxes, show_boxes=False) if len(bounding_boxes_rnet) != 0: bounding_boxes = np.vstack((bounding_boxes, bounding_boxes_rnet)) crop_img = None crop_box = None closet_face = None for id, box in enumerate(bounding_boxes, start=1): box = [min(max(0, int(box[i])), width if i % 2 == 0 else height) for i in range(4)] if box[2] - box[0] < self.crop_size: continue iou = IoU(box, faces) iou_max = iou.max() iou_index = iou.argmax() closet_face = faces[iou_index] # print('iou_max:{}, iou_index:{}'.format(iou_max, iou_index)) # ioumax = max(iou, iou_max) crop_img = get_crop_img(img_np=img_np, crop_box=box, crop_size=self.crop_size) # img_box.show() # [(0, 0.3), (0.4, 0.65), (0.65, 1.0)] for temp_label in iou_th: if iou_max < iou_th[temp_label][0] or iou_max > iou_th[temp_label][1]: continue else: label = temp_label crop_box = box crop_img = get_crop_img(img_np, box, self.crop_size) self.cache.append((crop_img, get_real_label(label), cal_offset(closet_face, crop_box), cal_landmark_offset(crop_box, ldmk))) return (None, None, None, None) if len(self.cache) == 0 else self.cache.pop(0)