def lnet_reader_func(q_in, q_out): counter = 0 while not q_in.empty(): item = q_in.get() counter += 1 if counter % 10000 == 0: logger.info('%s reads %d', mp.current_process().name, counter) img_path, bbox, landmark = item img = cv2.imread(img_path, cv2.IMREAD_COLOR) if img is None: logger.warn('read %s failed', img_path) continue x1, y1, x2, y2 = bbox w, h = x2 - x1, y2 - y1 # assert w == h, 'bbox is not a square' landmark = landmark.reshape((5, 2)) for _ in range(cfg.LNET_SAMPLE_PER_FACE): offset = np.random.rand(5, 2).astype(np.float32) offset = (2 * offset - 1) * cfg.SAMPLE_RADIUS for scale in cfg.LNET_FACE_SCALES: l = w * scale target = offset.copy() # target = target * w / l target /= scale target = target.reshape(10) data = np.zeros((24, 24, 15), dtype=np.uint8) for i in range(5): x, y = landmark[i] x_offset, y_offset = offset[i] * w x_center, y_center = x + x_offset, y + y_offset patch_bbox = [ x_center - l / 2, y_center - l / 2, x_center + l / 2, y_center + l / 2 ] patch = crop_face(img, patch_bbox) # # debug # print patch.shape, scale, x_offset, y_offset, target[i, 0], target[i, 1] # patch = patch.copy() # patch_x, patch_y = patch_bbox[:2] # cv2.circle(patch, (int(x_center - patch_x), int(y_center - patch_y)), 1, (0, 255, 0), -1) # cv2.circle(patch, (int(x - patch_x), int(y - patch_y)), 1, (0, 0, 255), -1) # cv2.imshow('patch', patch) # cv2.waitKey(0) patch = cv2.resize(patch, (24, 24)) data[:, :, (3 * i):(3 * i + 3)] = patch data = data.transpose((2, 0, 1)) # 15x24x24, uint8 target *= -1 q_out.put(('data', [data, target]))
def proposal(img, gt_bboxes, detector=None): '''given an image with face bboxes, proposal negatives, positives and part faces for rNet and oNet, we use previous networks to proposal bboxes Return (negatives, positives, part) negatives: [data, bbox] positives: [(data, bbox, bbox_target)] part: [(data, bbox, bbox_target)] ''' # ======================= proposal for rnet and onet ============== if detector is not None: assert isinstance(detector, JfdaDetector) #print("HERE??>>96") bboxes = detector.detect(img, **cfg.DETECT_PARAMS) # # maybe sort it by score in descending order # bboxes = bboxes[bboxes[:, 4].argsort()[::-1]] # keep bbox info, drop score, offset and landmark bboxes = bboxes[:, :4] ovs = bbox_overlaps(bboxes, gt_bboxes) ovs_max = ovs.max(axis=1) ovs_idx = ovs.argmax(axis=1) pos_idx = np.where(ovs_max > cfg.FACE_OVERLAP)[0] neg_idx = np.where(ovs_max < cfg.NONFACE_OVERLAP)[0] part_idx = np.where( np.logical_and(ovs_max > cfg.PARTFACE_OVERLAP, ovs_max <= cfg.FACE_OVERLAP))[0] # pos positives = [] for idx in pos_idx: bbox = bboxes[idx].reshape(4) gt_bbox = gt_bboxes[ovs_idx[idx]] data = crop_face(img, bbox) if data is None: continue # cv2.imshow('pos', data) # cv2.waitKey() k = bbox[2] - bbox[0] bbox_target = (gt_bbox - bbox) / k positives.append((data, bbox, bbox_target)) # part part = [] for idx in part_idx: bbox = bboxes[idx].reshape(4) gt_bbox = gt_bboxes[ovs_idx[idx]] data = crop_face(img, bbox) if data is None: continue # cv2.imshow('part', data) # cv2.waitKey() k = bbox[2] - bbox[0] bbox_target = (gt_bbox - bbox) / k part.append((data, bbox, bbox_target)) # neg negatives = [] np.random.shuffle(neg_idx) for idx in neg_idx[:cfg.NEG_DETECT_PER_IMAGE]: bbox = bboxes[idx].reshape(4) data = crop_face(img, bbox) if data is None: continue # cv2.imshow('neg', data) # cv2.waitKey() negatives.append((data, bbox)) return negatives, positives, part # ======================= proposal for pnet ======================= height, width = img.shape[:-1] negatives, positives, part = [], [], [] # ===== proposal positives ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_positives = [] for scale in cfg.POS_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.POS_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4))) ovs = ovs.reshape((1, len(candidates)))[0] pos_bboxes = candidates[ovs > cfg.FACE_OVERLAP, :] # pdb.set_trace() if len(pos_bboxes) > 0: np.random.shuffle(pos_bboxes) for bbox in pos_bboxes[:cfg.POS_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('positive', data) # cv2.waitKey() bbox_target = (gt_bbox - bbox) / k this_positives.append((data, bbox, bbox_target)) random.shuffle(this_positives) positives.extend(this_positives[:cfg.POS_PER_FACE]) # ===== proposal part faces ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_part = [] for scale in cfg.PART_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.PART_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4))) ovs = ovs.reshape((1, len(candidates)))[0] part_bboxes = candidates[np.logical_and( ovs > cfg.PARTFACE_OVERLAP, ovs <= cfg.FACE_OVERLAP), :] if len(part_bboxes) > 0: np.random.shuffle(part_bboxes) for bbox in part_bboxes[:cfg.PART_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('part', data) # cv2.waitKey() bbox_target = (gt_bbox - bbox) / k this_part.append((data, bbox, bbox_target)) random.shuffle(this_part) part.extend(this_part[:cfg.POS_PER_FACE]) # ===== proposal negatives ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_negatives = [] for scale in cfg.NEG_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.NEG_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bboxes) neg_bboxes = candidates[ovs.max(axis=1) < cfg.NONFACE_OVERLAP, :] if len(neg_bboxes) > 0: np.random.shuffle(neg_bboxes) for bbox in neg_bboxes[:cfg.NEG_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('negative', data) # cv2.waitKey() this_negatives.append((data, bbox)) random.shuffle(this_negatives) negatives.extend(this_negatives[:cfg.NEG_PER_FACE]) # negatives from global image random crop max_num_from_fr = int(cfg.NEG_PER_IMAGE * cfg.NEG_FROM_FR_RATIO) if len(negatives) > max_num_from_fr: random.shuffle(negatives) negatives = negatives[:max_num_from_fr] bbox_neg = [] range_x, range_y = width - cfg.NEG_MIN_SIZE, height - cfg.NEG_MIN_SIZE for i in range(0, cfg.NEG_PROPOSAL_RATIO * cfg.NEG_PER_IMAGE): x1, y1 = np.random.randint(range_x), np.random.randint(range_y) w = h = np.random.randint(low=cfg.NEG_MIN_SIZE, high=min(width - x1, height - y1)) x2, y2 = x1 + w, y1 + h bbox_neg.append([x1, y1, x2, y2]) if x2 > width or y2 > height: print('hhhh') bbox_neg = np.asarray(bbox_neg, dtype=gt_bboxes.dtype) ovs = bbox_overlaps(bbox_neg, gt_bboxes) bbox_neg = bbox_neg[ovs.max(axis=1) < cfg.NONFACE_OVERLAP] np.random.shuffle(bbox_neg) if not cfg.NEG_FORCE_BALANCE: remain = cfg.NEG_PER_IMAGE - len(negatives) else: # balance ratio from face region and global crop remain = len(negatives) * ( 1. - cfg.NEG_FROM_FR_RATIO) / cfg.NEG_FROM_FR_RATIO remain = int(remain) bbox_neg = bbox_neg[:remain] # for bbox in bbox_neg: # x1, y1, x2, y2 = bbox # x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 1) # cv2.imshow('neg', img) # cv2.waitKey() for bbox in bbox_neg: data = crop_face(img, bbox) negatives.append((data, bbox)) return negatives, positives, part
def detect(self, img, ths, min_size, factor, debug=False): '''detect face, return bboxes, [bbox score offset landmark] if debug is on, return bboxes of every stage and time consumption ''' timer = Timer() ts = [0, 0, 0, 0] bb = [[], [], [], []] # stage-1 timer.tic() base = 12. / min_size height, width = img.shape[:-1] l = min(width, height) l *= base scales = [] while l > 12: scales.append(base) base *= factor l *= factor if not self.pnet_single_forward or len(scales) <= 1: bboxes = np.zeros((0, 4 + 1 + 4 + 10), dtype=np.float32) for scale in scales: w, h = int(math.ceil(scale * width)), int( math.ceil(scale * height)) data = cv2.resize(img, (w, h)) data = data.transpose((2, 0, 1)).astype(np.float32) data = (data - 128) / 128 data = data.reshape((1, 3, h, w)) prob, bbox_pred, landmark_pred = self._forward( self.pnet, data, ['prob', 'bbox_pred', 'landmark_pred']) _bboxes = self._gen_bbox(prob[0][1], bbox_pred[0], landmark_pred[0], scale, ths[0]) keep = nms(_bboxes, 0.5) _bboxes = _bboxes[keep] bboxes = np.vstack([bboxes, _bboxes]) else: # convert to a single image data, pyramid_info = convert_image_pyramid(img, scales, interval=2) # forward pnet data = data.astype(np.float32) data = (data.transpose((2, 0, 1)) - 128) / 128 data = data[np.newaxis, :, :, :] prob, bbox_pred, landmark_pred = self._forward( self.pnet, data, ['prob', 'bbox_pred', 'landmark_pred']) bboxes = self._gen_bbox(prob[0][1], bbox_pred[0], landmark_pred[0], 1, ths[0]) # nms over every pyramid keep = nms(bboxes, 0.5) bboxes = bboxes[keep] # map to original image bboxes = get_original_bboxes(bboxes, pyramid_info) keep = nms(bboxes, 0.7) bboxes = bboxes[keep] bboxes = self._bbox_reg(bboxes) bboxes = self._make_square(bboxes) timer.toc() ts[0] = timer.elapsed() bb[0] = bboxes.copy() self._clear_network_buffer(self.pnet) # stage-2 if self.rnet is None or len(bboxes) == 0: if debug is True: return bb, ts else: return bboxes timer.tic() n = len(bboxes) data = np.zeros((n, 3, 24, 24), dtype=np.float32) for i, bbox in enumerate(bboxes): face = crop_face(img, bbox[:4]) data[i] = cv2.resize(face, (24, 24)).transpose((2, 0, 1)) data = (data - 128) / 128 prob, bbox_pred, landmark_pred = self._forward( self.rnet, data, ['prob', 'bbox_pred', 'landmark_pred']) prob = prob.reshape(n, 2) bbox_pred = bbox_pred.reshape(n, 4) landmark_pred = landmark_pred.reshape(n, 10) keep = prob[:, 1] > ths[1] bboxes = bboxes[keep] bboxes[:, 4] = prob[keep, 1] bboxes[:, 5:9] = bbox_pred[keep] bboxes[:, 9:] = landmark_pred[keep] keep = nms(bboxes, 0.7) bboxes = bboxes[keep] bboxes = self._bbox_reg(bboxes) bboxes = self._make_square(bboxes) timer.toc() ts[1] = timer.elapsed() bb[1] = bboxes.copy() self._clear_network_buffer(self.rnet) # stage-3 if self.onet is None or len(bboxes) == 0: if debug is True: return bb, ts else: return bboxes timer.tic() n = len(bboxes) data = np.zeros((n, 3, 48, 48), dtype=np.float32) for i, bbox in enumerate(bboxes): face = crop_face(img, bbox[:4]) data[i] = cv2.resize(face, (48, 48)).transpose((2, 0, 1)) data = (data - 128) / 128 prob, bbox_pred, landmark_pred = self._forward( self.onet, data, ['prob', 'bbox_pred', 'landmark_pred']) prob = prob.reshape(n, 2) bbox_pred = bbox_pred.reshape(n, 4) landmark_pred = landmark_pred.reshape(n, 10) keep = prob[:, 1] > ths[2] bboxes = bboxes[keep] bboxes[:, 4] = prob[keep, 1] bboxes[:, 5:9] = bbox_pred[keep] bboxes[:, 9:] = landmark_pred[keep] bboxes = self._locate_landmark(bboxes) bboxes = self._bbox_reg(bboxes) keep = nms(bboxes, 0.7, 'Min') bboxes = bboxes[keep] timer.toc() ts[2] = timer.elapsed() bb[2] = bboxes.copy() self._clear_network_buffer(self.onet) # stage-4 if self.lnet is None or len(bboxes) == 0: if debug is True: return bb, ts else: return bboxes timer.tic() n = len(bboxes) data = np.zeros((n, 15, 24, 24), dtype=np.float32) w, h = bboxes[:, 2] - bboxes[:, 0], bboxes[:, 3] - bboxes[:, 1] l = np.maximum(w, h) * 0.25 for i in range(len(bboxes)): x1, y1, x2, y2 = bboxes[i, :4] landmark = bboxes[i, 9:].reshape((5, 2)) for j in range(5): x, y = landmark[j] patch_bbox = [ x - l[i] / 2, y - l[i] / 2, x + l[i] / 2, y + l[i] / 2 ] patch = crop_face(img, patch_bbox) patch = cv2.resize(patch, (24, 24)) patch = patch.transpose((2, 0, 1)) data[i, (3 * j):(3 * j + 3)] = patch data = (data - 128) / 128 offset = self._forward(self.lnet, data, ['landmark_offset'])[0] offset = offset.reshape(n, 10) offset *= l.reshape((-1, 1)) bboxes[:, 9:] += offset timer.toc() ts[3] = timer.elapsed() bb[3] = bboxes.copy() self._clear_network_buffer(self.lnet) if debug is True: return bb, ts else: return bboxes