def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): pnet, rnet, onet = None, None, None if p_model_path is not None: pnet = PNet(use_cuda=use_cuda) pnet.load_state_dict(torch.load(p_model_path)) if (use_cuda): pnet.cuda() pnet.eval() if r_model_path is not None: rnet = RNet(use_cuda=use_cuda) rnet.load_state_dict(torch.load(r_model_path)) if (use_cuda): rnet.cuda() rnet.eval() if o_model_path is not None: onet = ONet(use_cuda=use_cuda) onet.load_state_dict(torch.load(o_model_path)) if (use_cuda): onet.cuda() onet.eval() return pnet, rnet, onet
def prepare(self): self.model = load_model(self.net, self.cfg['trained_model'], False) self.model.eval() cudnn.benchmark = True self.model = self.model.to(self.device) print('Finished loading detect model!') self.Onet = load_onet(ONet(), self.cfg['Onet_weights'], self.device) # self.Onet.eval() print('Finished loading Five Points model!')
def create_mtcnn_net(self, use_cuda=True): self.device = torch.device( "cuda" if use_cuda and torch.cuda.is_available() else "cpu") pnet = PNet() pnet.load_state_dict(model_zoo.load_url(model_urls['pnet'])) pnet.to(self.device).eval() onet = ONet() onet.load_state_dict(model_zoo.load_url(model_urls['onet'])) onet.to(self.device).eval() rnet = RNet() rnet.load_state_dict(model_zoo.load_url(model_urls['rnet'])) rnet.to(self.device).eval() return pnet, rnet, onet
def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): pnet, rnet, onet = None, None, None if p_model_path is not None: pnet = PNet(use_cuda=use_cuda) if (use_cuda): print('p_model_path:{0}'.format(p_model_path)) pnet.load_state_dict(torch.load(p_model_path)) pnet.cuda() else: # forcing all GPU tensors to be in CPU while loading pnet.load_state_dict( torch.load(p_model_path, map_location=lambda storage, loc: storage)) pnet.eval() if r_model_path is not None: rnet = RNet(use_cuda=use_cuda) if (use_cuda): print('r_model_path:{0}'.format(r_model_path)) rnet.load_state_dict(torch.load(r_model_path)) rnet.cuda() else: rnet.load_state_dict( torch.load(r_model_path, map_location=lambda storage, loc: storage)) rnet.eval() if o_model_path is not None: onet = ONet(use_cuda=use_cuda) if (use_cuda): print('o_model_path:{0}'.format(o_model_path)) onet.load_state_dict(torch.load(o_model_path)) onet.cuda() else: onet.load_state_dict( torch.load(o_model_path, map_location=lambda storage, loc: storage)) onet.eval() return pnet, rnet, onet
def create_mtcnn_net(self, use_cuda=True): self.device = torch.device( "cuda" if use_cuda and torch.cuda.is_available() else "cpu") pnet = PNet() summary(pnet.cuda(), (3, 12, 12)) # pnet.load_state_dict(model_zoo.load_url(model_urls['pnet'])) pnet.to(self.device).eval() rnet = RNet(num_landmarks=config.NUM_LANDMARKS) summary(rnet.cuda(), (3, 24, 24)) # rnet.load_state_dict(model_zoo.load_url(model_urls['rnet'])) rnet.to(self.device).eval() onet = ONet(num_landmarks=config.NUM_LANDMARKS) summary(onet.cuda(), (3, 48, 48)) # onet.load_state_dict(model_zoo.load_url(model_urls['onet'])) onet.to(self.device).eval() return pnet, rnet, onet
def create_mtcnn_net(self, use_cuda=True): self.device = torch.device( "cuda" if use_cuda and torch.cuda.is_available() else "cpu") pnet = PNet() #pnet.load_state_dict(torch.load(r'.\results\pnet\log_bs512_lr0.010_072402\check_point\model_050.pth')) pnet.load_state_dict(model_zoo.load_url(model_urls['pnet'])) pnet.to(self.device).eval() onet = ONet() #onet.load_state_dict(torch.load(r'.\results\onet\log_bs512_lr0.010_072602\check_point\model_050.pth')) onet.load_state_dict(model_zoo.load_url(model_urls['onet'])) onet.to(self.device).eval() rnet = RNet() #rnet.load_state_dict(torch.load(r'.\results\rnet\log_bs512_lr0.001_072502\check_point\model_050.pth')) rnet.load_state_dict(model_zoo.load_url(model_urls['rnet'])) rnet.to(self.device).eval() return pnet, rnet, onet
def detect_faces(image, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8], nms_thresholds=[0.7, 0.7, 0.7]): """ Arguments: image: an instance of PIL.Image. min_face_size: a float number. thresholds: a list of length 3. nms_thresholds: a list of length 3. Returns: two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], bounding boxes and facial landmarks. """ with torch.no_grad(): # LOAD MODELS pnet = PNet().to(device) rnet = RNet().to(device) onet = ONet().to(device) onet.eval() # BUILD AN IMAGE PYRAMID width, height = image.size min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) # scales for scaling the image scales = [] # scales the image so that # minimum size that we can detect equals to # minimum face size that we want to detect m = min_detection_size / min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m * factor ** factor_count) min_length *= factor factor_count += 1 # STAGE 1 # it will be returned bounding_boxes = [] # run P-Net on different scales for s in scales: boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0]) bounding_boxes.append(boxes) # collect boxes (and offsets, and scores) from different scales bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # use offsets predicted by pnet to transform bounding boxes bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # shape [n_boxes, 5] bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 2 img_boxes = get_image_boxes(bounding_boxes, image, size=24) img_boxes = Variable(torch.FloatTensor(img_boxes).to(device)) output = rnet(img_boxes) offsets = output[0].data.cpu().numpy() # shape [n_boxes, 4] probs = output[1].data.cpu().numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 3 img_boxes = get_image_boxes(bounding_boxes, image, size=48) if len(img_boxes) == 0: return [], [] img_boxes = Variable(torch.FloatTensor(img_boxes).to(device)) output = onet(img_boxes) landmarks = output[0].data.cpu().numpy() # shape [n_boxes, 10] offsets = output[1].data.cpu().numpy() # shape [n_boxes, 4] probs = output[2].data.cpu().numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[2])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,)) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, nms_thresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] return bounding_boxes, landmarks
transform) # img, t = dataset.__getitem__(2) # print(img.shape, t.shape) dataloader = data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=4) """ for img, landmark in dataloader: print(img.shape, landmark.shape) """ # optimizer = optim.Adam() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cudnn.benchmark = True net = ONet() #net = net.to(device) def train_onet(model, dataloader, base_lr=1e-4, epoch=150): optimizer = optim.Adam(model.parameters(), lr=base_lr) for ep in range(epoch): for img, landmark in dataloader: img = img.to(device) landmark = landmark.to(device) optimizer.zero_grad() landmarks_pred = model(img) loss = landmark_loss(landmark, landmarks_pred) loss.backward() optimizer.step()
def train_onet(model_store_path, end_epoch, imdb, batch_size, frequent=50, base_lr=0.01, use_cuda=True): if not os.path.exists(model_store_path): os.makedirs(model_store_path) lossfn = LossFn() net = ONet(is_train=True) net.train() print(use_cuda) if use_cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) train_data = TrainImageReader(imdb, 48, batch_size, shuffle=True) for cur_epoch in range(1, end_epoch + 1): train_data.reset() for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): # print("batch id {0}".format(batch_idx)) im_tensor = [ image_tools.convert_image_to_tensor(image[i, :, :, :]) for i in range(image.shape[0]) ] im_tensor = torch.stack(im_tensor) im_tensor = Variable(im_tensor) gt_label = Variable(torch.from_numpy(gt_label).float()) gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) if use_cuda: im_tensor = im_tensor.cuda() gt_label = gt_label.cuda() gt_bbox = gt_bbox.cuda() gt_landmark = gt_landmark.cuda() cls_pred, box_offset_pred, landmark_offset_pred = net(im_tensor) # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) cls_loss = lossfn.cls_loss(gt_label, cls_pred) box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) landmark_loss = lossfn.landmark_loss(gt_label, gt_landmark, landmark_offset_pred) all_loss = cls_loss * 0.8 + box_offset_loss * 0.6 + landmark_loss * 1.5 if batch_idx % frequent == 0: accuracy = compute_accuracy(cls_pred, gt_label) show1 = accuracy.data.cpu().numpy() show2 = cls_loss.data.cpu().numpy() show3 = box_offset_loss.data.cpu().numpy() show4 = landmark_loss.data.cpu().numpy() show5 = all_loss.data.cpu().numpy() print( "%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, landmark loss: %s, all_loss: %s, lr:%s " % (datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show4, show5, base_lr)) optimizer.zero_grad() all_loss.backward() optimizer.step() torch.save( net.state_dict(), os.path.join(model_store_path, "onet_epoch_%d.pt" % cur_epoch)) torch.save( net, os.path.join(model_store_path, "onet_epoch_model_%d.pkl" % cur_epoch))
print(' <PNet> feature map grid num :{}, height :{}, width :{}'.format(offset.size()[2]*offset.size()[3],offset.size()[2],offset.size()[3])) #------------------------------------------------------------------------------------ RNet print('\n ----------------------------------------------------------') input = torch.randn([56, 3, 24,24]) m_RNet = RNet() # print('\n',m_RNet) with torch.no_grad(): label, offset = m_RNet(input) print('\n <RNet> output :') print(' <RNet> label :',label.size()) print(' <RNet> offset :',offset.size()) #------------------------------------------------------------------------------------ RNet print('\n ----------------------------------------------------------') input = torch.randn([23, 3, 48,48]) m_ONet = ONet() # print('\n',m_ONet) with torch.no_grad(): label, offset = m_ONet(input) print('\n <ONet> output :') print(' <ONet> label :',label.size()) print(' <ONet> offset :',offset.size())