def onet_detector(image): """ :param image: 输入为48*48大小的图像 :return: 返回概率值 """ sym = O_Net('test') ctx = mx.cpu() # ctx = mx.gpu() # ctx = [mx.gpu(int(i)) for i in [0,1,2,3]] args, auxs = load_param('model/onet', 9, convert=False, ctx=ctx) data_size = 48 # landmark net 输入的图像尺寸为48*48 data_shapes = {'data': (1, 3, data_size, data_size)} # # img_resized = cv2.resize(image, (48, 48)) newimg = transform(image) args['data'] = mx.nd.array(newimg, ctx) executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes)) executor.copy_params_from(args, auxs) executor.forward(is_train=False) # inference out_list = [[] for _ in range(len(executor.outputs))] for o_list, o_nd in zip(out_list, executor.outputs): o_list.append(o_nd.asnumpy()) out = list() for o in out_list: out.append(np.vstack(o)) cls_pro = out[0][0][1] return out
def detect_onet(self, im, dets, mode='test'): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_c: numpy array boxes after calibration """ h, w, c = im.shape dets = self.convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting ONet batch size batch_size = self.onet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset ONet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32) for i in range(num_boxes): if tmph[i] >= 2 and tmpw[i] >= 2: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] cropped_ims[i, :, :, :] = image_processing.transform( cv2.resize(tmp, (48, 48))) cls_scores, reg = self.onet_detector.predict(cropped_ims) cls_scores = cls_scores[:, 1].flatten() keep_inds = np.where(cls_scores > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] else: return None, None boxes_c = self.calibrate_box(boxes, reg) if mode == 'test': keep = py_nms(boxes_c, 0.7, "Minimum") boxes_c = boxes_c[keep] return boxes, boxes_c
def get_minibatch_thread(imdb, num_classes, im_size, with_type, with_cls, with_bbox, with_landmark): num_images = len(imdb) processed_ims = list() cls_label = list() type_label = list() bbox_reg_target = list() landmark_reg_target = list() #print(num_images) for i in range(num_images): filename = imdb[i]['image'] #print(filename) im = cv2.imread(filename) h, w, c = im.shape if with_type: type = imdb[i]['type_label'] type_label.append(type) if with_cls: cls = imdb[i]['label'] cls_label.append(cls) if with_bbox: bbox_target = imdb[i]['bbox_target'] bbox_reg_target.append(bbox_target) if with_landmark: landmark_target = imdb[i]['landmark_target'] landmark_reg_target.append(landmark_target) assert h == w == im_size, "image size wrong" if imdb[i]['flipped']: im = im[:, ::-1, :] im_tensor = image_processing.transform(im, True) processed_ims.append(im_tensor) return processed_ims, cls_label, type_label, bbox_reg_target, landmark_reg_target
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_c: numpy array boxes after calibration """ if dets is None: return None, None h, w, c = im.shape dets = self.convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting ONet batch size batch_size = self.onet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print("You may need to reset ONet batch size if this info appears frequently, \) face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32) for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :] cropped_ims[i, :, :, :] = image_processing.transform(cv2.resize(tmp, (48, 48))) cls_scores, reg = self.onet_detector.predict(cropped_ims) cls_scores = cls_scores[:, 1].flatten() keep_inds = np.where(cls_scores > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] else: return None, None boxes_c = self.calibrate_box(boxes, reg) keep = py_nms(boxes_c, 0.7, "Minimum") boxes_c = boxes_c[keep] return boxes, boxes_c
def get_minibatch_thread(imdb, im_size): num_images = len(imdb) processed_ims = list() landmark_reg_target = list() #print(num_images) for i in range(num_images): im,landmark = augment_for_one_image(imdb[i],im_size) im_tensor = image_processing.transform(im,True) processed_ims.append(im_tensor) landmark_reg_target.append(landmark) return processed_ims, landmark_reg_target
def _data_augmentation(self, data, label): """ perform data augmentations: crop, mirror, resize, sub mean, swap channels... """ if self.is_train and self._rand_samplers: rand_crops = [] for rs in self._rand_samplers: rand_crops += rs.sample(label) num_rand_crops = len(rand_crops) # randomly pick up one as input data if num_rand_crops > 0: index = int(np.random.uniform(0, 1) * num_rand_crops) width = data.shape[1] height = data.shape[0] crop = rand_crops[index][0] xmin = int(crop[0] * width) ymin = int(crop[1] * height) xmax = int(crop[2] * width) ymax = int(crop[3] * height) if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height: data = data[ymin:ymax, xmin:xmax, :] else: # padding mode new_width = xmax - xmin new_height = ymax - ymin offset_x = 0 - xmin offset_y = 0 - ymin data_bak = data data = np.full((new_height, new_width, 3), 128.) data[offset_y:offset_y + height, offset_x:offset_x + width, :] = data_bak label = rand_crops[index][1] if self.is_train and self._rand_mirror: if np.random.uniform(0, 1) > 0.5: data = cv2.flip(data, 1) valid_mask = np.where(label[:, 0] > -1)[0] tmp = 1.0 - label[valid_mask, 1] label[valid_mask, 1] = 1.0 - label[valid_mask, 3] label[valid_mask, 3] = tmp if self.is_train: interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \ cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] else: interp_methods = [cv2.INTER_LINEAR] interp_method = interp_methods[int( np.random.uniform(0, 1) * len(interp_methods))] data = resize(data, self._data_shape, interp_method) data = transform(data, self._mean_pixels) return data, label
def _data_augmentation(self, data, label): """ perform data augmentations: crop, mirror, resize, sub mean, swap channels... """ if self.is_train and self._rand_samplers: rand_crops = [] for rs in self._rand_samplers: rand_crops += rs.sample(label) num_rand_crops = len(rand_crops) # randomly pick up one as input data if num_rand_crops > 0: index = int(np.random.uniform(0, 1) * num_rand_crops) width = data.shape[1] height = data.shape[0] crop = rand_crops[index][0] xmin = int(crop[0] * width) ymin = int(crop[1] * height) xmax = int(crop[2] * width) ymax = int(crop[3] * height) if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height: data = data[ymin:ymax, xmin:xmax, :] else: # padding mode new_width = xmax - xmin new_height = ymax - ymin offset_x = 0 - xmin offset_y = 0 - ymin data_bak = data data = np.full((new_height, new_width, 3), 128.) data[offset_y:offset_y+height, offset_x:offset_x + width, :] = data_bak label = rand_crops[index][1] if self.is_train and self._rand_mirror: if np.random.uniform(0, 1) > 0.5: data = cv2.flip(data, 1) valid_mask = np.where(label[:, 0] > -1)[0] tmp = 1.0 - label[valid_mask, 1] label[valid_mask, 1] = 1.0 - label[valid_mask, 3] label[valid_mask, 3] = tmp if self.is_train: interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \ cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] else: interp_methods = [cv2.INTER_LINEAR] interp_method = interp_methods[int(np.random.uniform(0, 1) * len(interp_methods))] data = resize(data, self._data_shape, interp_method) data = transform(data, self._mean_pixels) return data, label
def get_minibatch_thread(imdb, num_classes, im_size): num_images = len(imdb) processed_ims = list() label = list() for i in range(num_images): im = cv2.imread(imdb[i]['image']) h, w, c = im.shape cur_label = imdb[i]['label'] assert h == w == im_size, "image size wrong" if imdb[i]['flipped']: im = im[:, ::-1, :] im_tensor = image_processing.transform(im) processed_ims.append(im_tensor) label.append(cur_label) return processed_ims, label
def resize_image(self, img, scale): """ resize image and transform dimention to [batchsize, channel, height, width] Parameters: ---------- img: numpy array , height x width x channel input image, channels in BGR order here scale: float number scale factor of resize operation Returns: ------- transformed image tensor , 1 x channel x height x width """ height, width, channels = img.shape new_height = int(height * scale) # resized new height new_width = int(width * scale) # resized new width new_dim = (new_width, new_height) img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image img_resized = image_processing.transform(img_resized) return img_resized # (batch_size, c, h, w)
def get_minibatch_thread(imdb, num_classes, im_size): num_images = len(imdb) processed_ims = list() cls_label = list() bbox_reg_target = list() for i in range(num_images): im = cv2.imread(imdb[i]['image']) h, w, c = im.shape cls = imdb[i]['label'] bbox_target = imdb[i]['bbox_target'] assert h == w == im_size, "image size wrong" if imdb[i]['flipped']: im = im[:, ::-1, :] im_tensor = image_processing.transform(im) processed_ims.append(im_tensor) cls_label.append(cls) bbox_reg_target.append(bbox_target) return processed_ims, cls_label, bbox_reg_target
def get_minibatch_thread(imdb, mode='rgb'): num_images = len(imdb) processed_ims = list() cls_label = list() #print(num_images) for i in range(num_images): filename = config.root + '/classify_data/' + imdb[i]['image'] #print(filename) im = cv2.imread(filename) h, w, c = im.shape cls = imdb[i]['label'] cls_label.append(cls) if mode == 'rgb': im = im[:, :, ::-1] if imdb[i]['flipped']: im = im[:, ::-1, :] im_tensor = image_processing.transform(im) processed_ims.append(im_tensor) return processed_ims, cls_label
def demo(self, img, classes=[], thresh=0.6, show_timer=False): data = resize(img, (self.data_shape, self.data_shape), cv2.INTER_LINEAR) data = transform(data, self.mean_pixels) data = data[np.newaxis, :] self.mod.forward(Batch([mx.nd.array(data)])) detection = self.mod.get_outputs()[0].asnumpy() det = detection[0, :, :] dets = det[np.where(det[:, 0] >= 0)[0]] #img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)] # visualize det on img, to do height, width = img.shape[0], img.shape[1] #colors = dict() for i in range(dets.shape[0]): cls_id = int(dets[i, 0]) if cls_id >= 0: score = dets[i, 1] if score > thresh: if cls_id not in self.colors: self.colors[cls_id] = (256 * random.random(), 256 * random.random(), 256 * random.random()) xmin = int(dets[i, 2] * width) ymin = int(dets[i, 3] * height) xmax = int(dets[i, 4] * width) ymax = int(dets[i, 5] * height) # plot rectangle cv2.rectangle(img, (xmin, ymin), (xmax, ymax), self.colors[cls_id], 4) class_name = str(cls_id) if classes and len(classes) > cls_id: class_name = classes[cls_id] # add class name and score cv2.putText(img, '{:s} {:.3f}'.format(class_name, score), (xmin, ymin - 4), cv2.FONT_HERSHEY_SIMPLEX, 1, self.colors[cls_id], 4) return img
def get_minibatch(imdb, num_classes, im_size): # im_size: 12, 24 or 48 num_images = len(imdb) processed_ims = list() cls_label = list() bbox_reg_target = list() for i in range(num_images): im = cv2.imread(imdb[i]['image']) h, w, c = im.shape cls = imdb[i]['label'] bbox_target = imdb[i]['bbox_target'] assert h == w == im_size, "image size wrong" if imdb[i]['flipped']: im = im[:, ::-1, :] im_tensor = image_processing.transform(im) processed_ims.append(im_tensor) cls_label.append(cls) bbox_reg_target.append(bbox_target) im_array = np.vstack(processed_ims) label_array = np.array(cls_label) bbox_target_array = np.vstack(bbox_reg_target) ''' bbox_reg_weight = np.ones(label_array.shape) invalid = np.where(label_array == 0)[0] bbox_reg_weight[invalid] = 0 bbox_reg_weight = np.repeat(bbox_reg_weight, 4, axis=1) ''' if im_size == 12: label_array = label_array.reshape(-1, 1) data = {'data': im_array} label = {'label': label_array, 'bbox_target': bbox_target_array} return data, label
def test_landmark_net(crop_list, detect_len_list, original_detect, idx, img0, img_array): sym = L_Net('test') ctx = mx.cpu() # cv2.imshow("progin", img0) # load lnet model args, auxs = load_param('model/lnet', 4390, convert=False, ctx=ctx) # 1990 3330 4390 data_size = 48 # landmark net 输入的图像尺寸为48*48 imshow_size = 48 # imshow_size为landmark结果展示的图片尺寸 data_shapes = {'data': (1, 3, data_size, data_size)} disp_landmarks = [] for idx_ in range(idx): # img = cv2.imread('./detection_result{}.jpg'.format(idx_)) img = img_array[idx_] # img = cv2.resize(img, (data_size, data_size)) # 输入lnet的图片已经是48*48 无需resize # cv2.imshow("landmarks_10", img) # cv2.waitKey(0) newimg = transform(img) args['data'] = mx.nd.array(newimg, ctx) executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes)) # mx.cpu(), x=(5,4), grad_req='null' executor.copy_params_from(args, auxs) # print(executor.outputs) out_list = [[] for _ in range(len(executor.outputs))] executor.forward(is_train=False) for o_list, o_nd in zip(out_list, executor.outputs): o_list.append(o_nd.asnumpy()) out = list() for o in out_list: out.append(np.vstack(o)) landmarks = out[0] for j in range(int(len(landmarks) / 2)): if landmarks[2 * j] > 1: landmarks[2 * j] = 1 if landmarks[2 * j] < 0: landmarks[2 * j] = 0 if landmarks[2 * j + 1] > 1: landmarks[2 * j + 1] = 1 if landmarks[2 * j + 1] < 0: landmarks[2 * j + 1] = 0 # print(len(landmarks)) # print(landmarks) imshow_img = cv2.resize(img, (imshow_size, imshow_size)) landmarks = landmarks * imshow_size # print('------------') # print(landmarks) # print('------------') landmarks = np.reshape(landmarks, -1) # for j in range(int(len(landmarks)/2)): # cv2.circle(imshow_img, (int(landmarks[j]), (int(landmarks[j + 5]))), 2, (0, 0, 255),-1) # cv2.imshow("landmarks_10", imshow_img) # cv2.waitKey(0) fator = detect_len_list[idx_] / 48.0 disp_landmark = [] for j in range(int(len(landmarks) / 2)): display_landmark_x = int(landmarks[j] * fator + crop_list[idx_][0]) display_landmark_y = int(landmarks[j + 5] * fator + crop_list[idx_][1]) disp_landmark.append(display_landmark_x) disp_landmark.append(display_landmark_y) disp_landmarks.append(disp_landmark) for i in range(idx): for j in range(int(len(landmarks) / 2)): cv2.circle(img0, (int( disp_landmarks[i][j * 2]), int(disp_landmarks[i][j * 2 + 1])), 4, (0, 255, 0), -1) # b g r cv2.rectangle(img0, (int(original_detect[i][0]), int(original_detect[i][1])), (int(original_detect[i][2]), int(original_detect[i][3])), (0, 255, 0), 4) # (0, 255, 255) yellow cv2.imshow("landmarks_10_total", img0) cv2.waitKey(0)
import mxnet as mx sym = L106_Net112('test') pretrained='model/lnet106_112' epoch=4070 data_size=112 imshow_size=640 ctx = mx.cpu() args, auxs = load_param(pretrained, epoch, convert=True, ctx=ctx) #print(args) #print(auxs) data_shapes = {'data': (1, 3, data_size, data_size)} img=cv2.imread('./00_.jpg') img=cv2.resize(img,(data_size,data_size)) print(img.shape) newimg1 = transform(img,False) args['data'] = mx.nd.array(newimg1, ctx) executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes))#mx.cpu(), x=(5,4), grad_req='null' executor.copy_params_from(args, auxs) out_list = [[] for _ in range(len(executor.outputs))] executor.forward(is_train=False) for o_list, o_nd in zip(out_list, executor.outputs): o_list.append(o_nd.asnumpy()) out = list() for o in out_list: out.append(np.vstack(o)) landmarks=out[0] for j in range(int(len(landmarks)/2)): if(landmarks[2 * j]>1): landmarks[2 * j] = 1 if (landmarks[2 * j] < 0):
def doingLandmark_onet(self, image, trackBox): """ :param image: :param trackBox: :return: """ # x1 = trackBox[0] # y1 = trackBox[1] # # cv2.imwrite('error.jpg', image) # mtcnn_result = MTCNN(image) # print(mtcnn_result) # cls_pro = mtcnn_result[0][2] # 0 -> 5 points, 1 -> bbox, 2 ->score # bbox = mtcnn_result[0][1] # bbox[0] = bbox[0] + x1 # bbox[1] = bbox[1] + y1 # bbox[2] = bbox[2] + x1 # bbox[3] = bbox[3] + y1 # landmarks = mtcnn_result[0][0] # landmarks[0] = landmarks[0] + x1 # landmarks[1] = landmarks[1] + y1 # landmarks[2] = landmarks[2] + x1 # landmarks[3] = landmarks[3] + y1 # landmarks[4] = landmarks[4] + x1 # landmarks[5] = landmarks[5] + y1 # landmarks[6] = landmarks[6] + x1 # landmarks[7] = landmarks[7] + y1 # landmarks[8] = landmarks[8] + x1 # landmarks[9] = landmarks[9] + y1 # bbox = list(bbox) # return cls_pro, bbox, landmarks detect_length = min(image.shape[0], image.shape[1]) ctx = mx.cpu() # ctx = mx.gpu() # ctx = [mx.gpu(int(i)) for i in [0,1,2,3]] sym = L_Net('test') args, auxs = load_param('model/lnet', 4390, convert=False, ctx=ctx) data_size = 48 # landmark net 输入的图像尺寸为48*48 imshow_size = 48 # imshow_size为landmark结果展示的图片尺寸 data_shapes = {'data': (1, 3, data_size, data_size)} img_resized = cv2.resize(image, (48, 48)) result = self.onet_detector(img_resized) # 得到该图是人脸的概率值 cls_pro = result[0][0][1] reg_m = result[1][0] bbox_new = self.calibrate_box(trackBox, reg_m) newimg = transform(img_resized) args['data'] = mx.nd.array(newimg, ctx) executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes)) executor.copy_params_from(args, auxs) out_list = [[] for _ in range(len(executor.outputs))] executor.forward(is_train=False) # inference for o_list, o_nd in zip(out_list, executor.outputs): o_list.append(o_nd.asnumpy()) out = list() for o in out_list: out.append(np.vstack(o)) landmarks = out[0] for j in range(int(len(landmarks) / 2)): if landmarks[2 * j] > 1: landmarks[2 * j] = 1 if landmarks[2 * j] < 0: landmarks[2 * j] = 0 if landmarks[2 * j + 1] > 1: landmarks[2 * j + 1] = 1 if landmarks[2 * j + 1] < 0: landmarks[2 * j + 1] = 0 landmarks = landmarks * imshow_size # landmarks输出值应该在0~1 需复原 landmarks = np.reshape(landmarks, -1) fator = float(detect_length) / 48.0 disp_landmark = [] for j in range(int(len(landmarks) / 2)): display_landmark_x = int(landmarks[j] * fator + trackBox[0]) display_landmark_y = int(landmarks[j + 5] * fator + trackBox[1]) disp_landmark.append(display_landmark_x) disp_landmark.append(display_landmark_y) # for j in range(int(len(landmarks) / 2)): # cv2.circle(frame, (int(disp_landmark[j * 2]), int(disp_landmark[j * 2 + 1])), 2, (0, 255, 0), -1) # b g r # cv2.rectangle(frame, (int(trackBox[0]), int(trackBox[1])), (int(trackBox[2]), int(trackBox[3])), (0, 255, 0), 2) # # cv2.imshow('frame', frame) # cv2.waitKey(0) return cls_pro, bbox_new, disp_landmark