def test_camera(): video_capture = cv2.VideoCapture(0) pnet = PNet() rnet = RNet() onet = ONet() frame_counter = 0 boxes, landmarks = None, None while True: ret, frame = video_capture.read() if ret: if frame_counter % 10 == 0: boxes = pnet.detect(frame) if len(boxes) > 0: boxes = rnet.detect(frame, boxes) if len(boxes) > 0: boxes, landmarks = onet.detect(frame, boxes) else: boxes, landmarks = None, None if boxes is not None: draw(frame, boxes, landmarks) cv2.imshow('image', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break frame_counter += 1
def load(self): sess = self.m_session p_path, r_path, o_path = self.m_model_path[0], None, None if len(self.m_model_path) >= 2: r_path = self.m_model_path[1] if len(self.m_model_path) == 3: o_path = self.m_model_path[2] if p_path: with tf.variable_scope('pnet'): data = tf.placeholder(tf.float32, (None, None, None, 3), 'input') pnet = PNet({'data': data}) pnet.load(os.path.join(self.m_model_path[0], 'det1.npy'), sess) self.pnet = lambda img: sess.run( ('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img}) if r_path: with tf.variable_scope('rnet'): data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input') rnet = RNet({'data': data}) rnet.load(os.path.join(self.m_model_path[1], 'det2.npy'), sess) self.rnet = lambda img: sess.run( ('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img}) if o_path: with tf.variable_scope('onet'): data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input') onet = ONet({'data': data}) onet.load(os.path.join(self.m_model_path[2], 'det3.npy'), sess) self.onet = lambda img: sess.run( ('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0': img})
def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet) if cuda.is_available() and use_cuda: self.pnet.cuda() self.rnet.cuda() self.onet.cuda() self.pnet.eval() self.rnet.eval() self.onet.eval() self.use_cuda = use_cuda
def export_to_pb(): pnet = PNet() rnet = RNet() onet = ONet() pnet.export_to_pb() rnet.export_to_pb() onet.export_to_pb()
def test_img(): pnet = PNet() rnet = RNet() onet = ONet() img = cv2.imread('C:\\Users\\lenovo\\Desktop\\0_Parade_Parade_0_693.jpg') boxes = pnet.detect(img) boxes = rnet.detect(img, boxes) boxes, landmarks = onet.detect(img, boxes) draw(img, boxes, landmarks) cv2.imshow('image', img) cv2.waitKey()
def __init__(self, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8], nms_thresholds=[0.7, 0.7, 0.7], device=None): # Selece t the device if device in ['gpu', 'cuda']: if not torch.cuda.is_available(): print("cuda not available, using cpu instead") self.device = torch.device('cpu') self.device = torch.device('cuda') elif device in ['cpu', 'none']: self.device = torch.device('cpu') else: self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print("Using {}...\n".format(self.device)) self.thresholds = thresholds self.nms_thresholds = nms_thresholds self.min_face_size = min_face_size self.empty_float = torch.tensor([], dtype=torch.float, device=self.device) self.pnet = PNet().to(device=self.device).eval() self.rnet = RNet().to(device=self.device).eval() self.onet = ONet().to(device=self.device).eval()
def main(): global w_emb, c_emb, initial_learning_rate with open(w_emb, 'rb') as handle: w_emb = pickle.load(handle) with open(c_emb, 'rb') as handle: c_emb = pickle.load(handle) train, val = get_batch() handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle(handle, train.output_types, train.output_shapes) result = [] current_best_loss = 20 model = RNet(iterator, w_emb[1], c_emb[1]) learning_rate = initial_learning_rate print("start training...") print("save every " +str(save_freq)+" iterations") print("check loss every " +str(learning_rate_change_freq)+" iterations") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=500) train_handle = sess.run(train.make_one_shot_iterator().string_handle()) val_handle = sess.run(val.make_one_shot_iterator().string_handle()) sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool))) sess.run(tf.assign(model.lr, tf.constant(learning_rate))) for x in range(1, iterations + 1): loss, _ = sess.run([model.loss, model.train_op], feed_dict={handle: train_handle}) if x % learning_rate_change_freq == 0: sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) val_loss, _ = sess.run([model.loss, model.train_op], feed_dict={ handle: val_handle}) sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool))) if val_loss < current_best_loss: current_best_loss = val_loss else: print("learning rate changed") learning_rate *= 0.5 sess.run(tf.assign(model.lr, tf.constant(learning_rate))) result.append((val_loss,loss)) if x % save_freq == 0: filename = os.path.join("./model", "model_{}.ckpt".format(x)) saver.save(sess, filename)
def main(argv): global rf, w_emb_d, c_emb_d, model_d with open(w_emb_d, 'rb') as handle: w_emb = pickle.load(handle) with open(c_emb_d, 'rb') as handle: c_emb = pickle.load(handle) test = make_example(argv[0]) model = RNet(tf.data.TFRecordDataset(rf).map( parser()).repeat().batch(batch_size).make_one_shot_iterator(), w_emb[1], c_emb[1], trainable=False) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, model_d) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) final = [] for _ in range(len(test) // batch_size + 1): ys = model.ys qa_id, _, y1, y2 = sess.run( [model.qa_id, model.loss, ys[0], ys[1]]) qq = [] for qid, p1, p2 in zip(qa_id.tolist(), y1.tolist(), y2.tolist()): qq.append( (test[str(qid)]["uuid"], test[str(qid)]["spans"][p1][0], test[str(qid)]["spans"][p2][1] - 1)) final.append(qq) f = open(argv[1], 'w') w = csv.writer(f) w.writerow(['id', 'answer']) count = 0 for l in final: for k in l: if count < len(test): w.writerow([ k[0], " ".join(str(x) for x in range(k[1], k[2] + 1)) ]) print(k) count += 1 f.close()
def __init__(self): use_cuda = torch.cuda.is_available() if use_cuda: self.device = 'cuda' self.tensor = torch.cuda.FloatTensor else: self.device = 'cpu' self.tensor = torch.FloatTensor self._pnet = PNet().to(self.device).eval() self._rnet = RNet().to(self.device).eval() self._onet = ONet().to(self.device).eval() self.scales = [0.3, 0.15, 0.07, 0.035] self.thresholds = [0.7, 0.8, 0.9] self.nms_thresholds = [0.7, 0.7, 0.7]
def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet)
def test(cfg): logging.info('Model is loading...') with open(cfg['dev_eval_file'], "r") as fh: dev_eval_file = json.load(fh) dev_dataset = SQuADDataset(cfg['dev_record_file'], -1, cfg['batch_size'], cfg['word2ind_file']) model_args = pickle.load(open(cfg['args_filename'], 'rb')) model = RNet(**model_args) model.load_state_dict(torch.load(cfg['dump_filename'])) model.to(device) metrics, answer_dict = evaluation(model, dev_dataset, dev_eval_file, len(dev_dataset)) with open('logs/answers.json', 'w') as f: json.dump(answer_dict, f) logging.info("TEST loss %f F1 %f EM %f\n", metrics["loss"], metrics["f1"], metrics["exact_match"])
def train(args): if torch.cuda.is_available(): device = torch.device("cuda") torch.cuda.set_device(args.cuda) else: device = torch.device("cpu") if args.net == "pnet": model = PNet(device) elif args.net == "rnet": model = RNet() elif args.net == "onet": model = ONet() else: raise Exception("Net Type Error!") loss_func = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), args.lr, args.momentum) transformed_data = WIDER_Dataset( data_path, anno_filename, transforms.Compose([Resize((12, 12)), Normalize(), To_Tensor()])) trainloader = DataLoader(transformed_data, batch_size=1, shuffle=True, collate_fn=transformed_data.collate_fn, num_workers=4, pin_memory=True) #model.to(device=device) for epoch in range(args.epoch): model.train() for i_batch, (images, boxes) in enumerate(trainloader): images.type(torch.DoubleTensor) images.to(device=device) boxes[0].to(device=device, dtype=torch.float) output = model(images) ptint(output.cpu())
full_size = opt.full_size print('===> Loading datasets') train_set = get_training_set(opt.upscale_factor, opt.full_size) test_set = get_test_set(opt.upscale_factor, opt.full_size) training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True) testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=True) print('===> Building model') model = RNet(upscale_factor=opt.upscale_factor, full_size=opt.full_size) model.to(device) criterion = nn.MSELoss() #Three optimizers, one for each output optimizerLow = optim.Adam(model.parameters(), lr=opt.lr) optimizerInt1 = optim.Adam(model.parameters(), lr=opt.lr) optimizerInt2 = optim.Adam(model.parameters(), lr=opt.lr) def train(epoch): low_loss = 0 int1_loss = 0 int2_loss = 0 for iteration, batch in enumerate(training_data_loader, 1): inimg, int1, int2, target = batch[0].to(device), batch[1].to( device), batch[2].to(device), batch[3].to(device)
def train(model_params, launch_params): with open(launch_params['word_emb_file'], "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['char_emb_file'], "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['train_eval_file'], "r") as fh: train_eval_file = json.load(fh) with open(launch_params['dev_eval_file'], "r") as fh: dev_eval_file = json.load(fh) writer = SummaryWriter(os.path.join(launch_params['log'], launch_params['prefix'])) lr = launch_params['learning_rate'] base_lr = 1.0 warm_up = launch_params['lr_warm_up_num'] model_params['word_mat'] = word_mat model_params['char_mat'] = char_mat logging.info('Load dataset and create model.') dev_dataset = SQuADDataset(launch_params['dev_record_file'], launch_params['test_num_batches'], launch_params['batch_size'], launch_params['word2ind_file']) if launch_params['fine_tuning']: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['fine_tuning_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model_args = pickle.load(open(launch_params['args_filename'], 'rb')) model = RNet(**model_args) model.load_state_dict(torch.load(launch_params['dump_filename'])) model.to(device) else: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['num_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model = RNet(**model_params).to(device) launch_params['fine_tuning_steps'] = 0 params = filter(lambda param: param.requires_grad, model.parameters()) optimizer = optim.Adam(params, lr=base_lr, betas=(launch_params['beta1'], launch_params['beta2']), eps=1e-7, weight_decay=3e-7) cr = lr / log2(warm_up) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ee: cr * log2(ee + 1) if ee < warm_up else lr) qt = False logging.info('Start training.') for iter in range(launch_params['num_steps']): try: passage_w, passage_c, question_w, question_c, y1, y2, ids = train_dataset[iter] passage_w, passage_c = passage_w.to(device), passage_c.to(device) question_w, question_c = question_w.to(device), question_c.to(device) y1, y2 = y1.to(device), y2.to(device) loss, p1, p2 = model.train_step([passage_w, passage_c, question_w, question_c], y1, y2, optimizer, scheduler) if iter % launch_params['train_interval'] == 0: logging.info('Iteration %d; Loss: %f', iter+launch_params['fine_tuning_steps'], loss) writer.add_scalar('Loss', loss, iter+launch_params['fine_tuning_steps']) if iter % launch_params['train_sample_interval'] == 0: start = torch.argmax(p1[0, :]).item() end = torch.argmax(p2[0, start:]).item()+start passage = train_dataset.decode(passage_w) question = train_dataset.decode(question_w) generated_answer = train_dataset.decode(passage_w[:, start:end+1]) real_answer = train_dataset.decode(passage_w[:, y1[0]:y2[0]+1]) logging.info('Train Sample:\n Passage: %s\nQuestion: %s\nOriginal answer: %s\nGenerated answer: %s', passage, question, real_answer, generated_answer) if iter % launch_params['test_interval'] == 0: metrics, _ = evaluation(model, dev_dataset, dev_eval_file, launch_params['test_num_batches']) logging.info("TEST loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match']) writer.add_scalar('Test_loss', metrics['loss'], iter) writer.add_scalar('Test_f1', metrics['f1'], iter) writer.add_scalar('Test_em', metrics['exact_match'], iter) except RuntimeError as e: logging.error(str(e)) except KeyboardInterrupt: break torch.save(model.cpu().state_dict(), launch_params['dump_filename']) pickle.dump(model_params, open(launch_params['args_filename'], 'wb')) logging.info('Model has been saved.')
from model import detect_faces, show_bboxes, PNet, RNet, ONet from PIL import Image import numpy as np def test(filename, save_name, model): image = Image.open(filename) bounding_boxes, landmarks = detect_faces(image, model) show_bboxes(image, bounding_boxes, facial_landmarks=landmarks).save(save_name) if __name__ == "__main__": pnet = PNet() rnet = RNet() onet = ONet() model = (pnet, rnet, onet) # test("test_1.jpg", "1.jpg", model) # test("test_2.jpg", "2.jpg", model) # test("test_3.jpg", "3.jpg", model) test("test.jpg", "1.jpg", model)
default='data/train_data.pkl', help='Train Set', type=str) parser.add_argument('--valid_data', default='data/valid_data.pkl', help='Validation Set', type=str) # parser.add_argument('model', help='Model to evaluate', type=str) args = parser.parse_args() print('Creating the model...', end='') word_vector_dim = args.word_vector_dim model = RNet(hdim=args.hdim, dropout_rate=args.dropout, N=None, M=None, word2vec_dim=word_vector_dim, char_level_embeddings=args.char_level_embeddings) print('Done!') print('Compiling Keras model...', end='') optimizer_config = { 'class_name': args.optimizer, 'config': { 'lr': args.lr } if args.lr else {} } model.compile(optimizer=optimizer_config, loss=args.loss, metrics=['accuracy']) print('Done!') print('Loading datasets...', end='')
@LastEditTime: 2019-11-06 15:37:05 @Update: ''' import os import torch from torch import nn from torch import optim from torch.optim import lr_scheduler from config import configer from dataset import MtcnnData from model import RNet from model import MtcnnLoss, LossFn from trainer import MtcnnTrainer net = RNet() # state = torch.load('ckptdir/RNet_0025.pkl', map_location='cpu')['net_state']; net.load_state_dict(state) params = net.parameters() trainset = MtcnnData(configer.datapath, 24, 'train', save_in_memory=False) validset = MtcnnData(configer.datapath, 24, 'valid', save_in_memory=False) testset = MtcnnData(configer.datapath, 24, 'test', save_in_memory=False) # criterion = MtcnnLoss(1.0, 0.5, 0.0) criterion = LossFn(1.0, 0.5, 1.0) optimizer = optim.Adam lr_scheduler = lr_scheduler.ExponentialLR trainer = MtcnnTrainer(configer, net, params, trainset, validset, testset, criterion, optimizer, lr_scheduler) trainer.train()
class MtcnnDetector(object): """ mtcnn detector Params: prefix: {str} checkpoint Attributes: Content: """ def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet) if cuda.is_available() and use_cuda: self.pnet.cuda() self.rnet.cuda() self.onet.cuda() self.pnet.eval() self.rnet.eval() self.onet.eval() self.use_cuda = use_cuda def _load_state(self, net): ckpt = '../mtcnn_py/ckptdir/{}.pkl'.format(net._get_name()) if not os.path.exists(ckpt): return print("load state from {}".format(ckpt)) ckpt = torch.load(ckpt, map_location='cuda' if torch.cuda.is_available() else 'cpu') net.load_state_dict(ckpt['net_state']) def detect_image(self, image): """ Detect face over single image Params: image: {ndarray(H, W, C)} """ boxes, boxes_c, landmark = self._detect_pnet(image) boxes, boxes_c, landmark = self._detect_rnet(image, boxes_c) boxes, boxes_c, landmark = self._detect_onet(image, boxes_c) return boxes_c, landmark def _detect_pnet(self, image): """ Params: image: {ndarray(1, C, H, W)} Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 12 def _resize_image(image, scale): """ resize image according to scale Params: image: {ndarray(h, w, c)} scale: {float} """ h, w, c = image.shape hn = int(h*scale); wn = int(w*scale) resized = cv2.resize(image, (wn, hn), interpolation=cv2.INTER_LINEAR) return resized def _generate_box(cls_map, reg_map, thresh, scale): """ generate boxes Params: cls_map: {ndarray(h, w)} reg_map: {ndarray(4, h, w)} thresh: {float} scale: {float} Returns: bboxes: {ndarray(n_boxes, 9)} x1, y1, x2, y2, score, offsetx1, offsety1, offsetx2, offsety2 """ idx = np.where(cls_map>thresh) if idx[0].size == 0: return np.array([]) x1 = np.round(self.stride * idx[1] / scale) y1 = np.round(self.stride * idx[0] / scale) x2 = np.round((self.stride * idx[1] + self.cellsize) / scale) y2 = np.round((self.stride * idx[0] + self.cellsize) / scale) # print("current scale: {} current size: {}".format(scale, self.cellsize/scale)) score = cls_map[idx[0], idx[1]] reg = np.array([reg_map[i, idx[0], idx[1]] for i in range(4)]) boxes = np.vstack([x1, y1, x2, y2 ,score, reg]).T return boxes # ======================= generate boxes =========================== cur_scale = NETSIZE / self.min_face cur_img = _resize_image(image, cur_scale) all_boxes = None while min(cur_img.shape[:-1]) >= NETSIZE: ## forward network X = ToTensor()(cur_img).unsqueeze(0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.pnet(X)[0].cpu().detach().numpy() ## generate bbox cls_map = sigmoid(y_pred[0,:,:]) reg_map = y_pred[1:5,:,:] boxes = _generate_box(cls_map, reg_map, self.thresh[0], cur_scale) ## update scale cur_scale *= self.scale cur_img = _resize_image(image, cur_scale) if boxes.size == 0: continue ## nms # boxes = boxes[self._nms(boxes[:, :5], 0.6, 'Union')] # show_bbox(image.copy(), boxes[:, :5]) ## save bbox if all_boxes is None: all_boxes = boxes else: all_boxes = np.concatenate([all_boxes, boxes], axis=0) # ==================================================================== if all_boxes is None: return np.array([]), np.array([]), None ## nms all_boxes = all_boxes[self._nms(all_boxes[:, 0:5], 0.6, 'Union')] ## parse boxes = all_boxes[:, :4] # (n_boxes, 4) score = all_boxes[:, 4].reshape((-1, 1)) # (n_boxes, 1) offset = all_boxes[:, 5:] # (n_boxes, 4) # refine bbox boxes_c = self._cal_box(boxes, offset) ## concat boxes = np.concatenate([boxes, score], axis=1) boxes_c = np.concatenate([boxes_c, score], axis=1) ## landmark landmark = None return boxes, boxes_c, landmark def _detect_rnet(self, image, bboxes): """ Params: image: {ndarray(H, W, C)} bboxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 24 if bboxes.shape[0] == 0: return np.array([]), np.array([]), None bboxes = self._square(bboxes) patches = self._crop_patch(image, bboxes, NETSIZE) ## forward network X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.rnet(X).cpu().detach().numpy() # (n_boxes, 15) scores = sigmoid(y_pred[:, 0]) # (n_boxes,) offset = y_pred[:, 1: 5] # (n_boxes, 4) landmark = y_pred[:, 5:] # (n_boxes, 10) ## update score bboxes[:, -1] = scores ## filter idx = scores > self.thresh[1] bboxes = bboxes[idx] # (n_boxes, 5) offset = offset[idx] # (n_boxes, 4) landmark = landmark[idx] # (n_boxes, 10) if bboxes.shape[0] == 0: return np.array([]), np.array([]), None ## nms idx = self._nms(bboxes, 0.5) bboxes = bboxes[idx] offset = offset[idx] landmark = landmark[idx] ## landmark landmark = self._cal_landmark(bboxes[:, :-1], landmark) bboxes_c = self._cal_box(bboxes[:,:-1], offset) bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1) return bboxes, bboxes_c, landmark def _detect_onet(self, image, bboxes): """ Params: image: {ndarray(H, W, C)} bboxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 48 if bboxes.shape[0] == 0: return np.array([]), np.array([]), np.array([]) bboxes = self._square(bboxes) patches = self._crop_patch(image, bboxes, NETSIZE) ## forward network X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.onet(X).cpu().detach().numpy() # (n_boxes, 15) scores = sigmoid(y_pred[:, 0]) # (n_boxes,) offset = y_pred[:, 1: 5] # (n_boxes, 4) landmark = y_pred[:, 5:] # (n_boxes, 10) ## update score bboxes[:, -1] = scores ## filter idx = scores > self.thresh[2] bboxes = bboxes[idx] # (n_boxes, 5) offset = offset[idx] # (n_boxes, 4) landmark = landmark[idx] # (n_boxes, 10) if bboxes.shape[0] == 0: return np.array([]), np.array([]), np.array([]) ## nms idx = self._nms(bboxes, 0.5, mode='Minimum') bboxes = bboxes[idx] offset = offset[idx] landmark = landmark[idx] ## landmark landmark = self._cal_landmark(bboxes[:, :-1], landmark) bboxes_c = self._cal_box(bboxes[:,:-1], offset) bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1) return bboxes, bboxes_c, landmark @classmethod def _cal_box(self, boxes, offset): """ refine boxes Params: boxes: {ndarray(n_boxes, 4)} unrefined boxes offset: {ndarray(n_boxes, 4)} boxes offset Returns: boxes_c:{ndarray(n_boxes, 4)} refined boxes Notes: offset = (gt - square) / size of square box => gt = square + offset * size of square box (*) where - `offset`, `gt`, `square` are ndarrays - `size of square box` is a number """ ## square boxes' heights and widths x1, y1, x2, y2 = np.hsplit(boxes, 4) # (n_boxes, 1) w = x2 - x1 + 1; h = y2 - y1 + 1 # (n_boxes, 1) bsize = np.hstack([w, h]*2) # (n_boxes, 4) bbase = np.hstack([x1, y1, x2, y2]) # (n_boxes, 4) ## refine boxes_c = bbase + offset*bsize return boxes_c @classmethod def _cal_landmark(self, boxes, offset): """ calculate landmark Params: boxes: {ndarray(n_boxes, 4)} unrefined boxes offset: {ndarray(n_boxes, 10)} landmark offset Returns: landmark:{ndarray(n_boxes, 10)} landmark location Notes: offset_x = (gt_x - square_x1) / size of square box => gt_x = square_x1 + offset_x * size of square box (*) offset_y = (gt_y - square_y1) / size of square box => gt_y = square_y1 + offset_y * size of square box (*) where - `offset_{}`, `gt_{}`, `square_{}1` are ndarrays - `size of square box` is a number """ ## square boxes' heights and widths x1, y1, x2, y2 = np.hsplit(boxes, 4) # (n_boxes, 1) w = x2 - x1 +1; h = y2 - y1 + 1 # (n_boxes, 1) bsize = np.hstack([w, h]*5) # (n_boxes, 10) bbase = np.hstack([x1, y1]*5) # (n_boxes, 10) ## refine landmark = bbase + offset*bsize return landmark @classmethod def _nms(self, dets, thresh, mode="Union"): """ Params: dets: {ndarray(n_boxes, 5)} x1, y1, x2, y2 score thresh: {float} retain overlap <= thresh mode: {str} 'Union' or 'Minimum' Returns: idx: {list[int]} indexes to keep Notes: greedily select boxes with high confidence idx boxes overlap <= thresh rule out overlap > thresh if thresh==1.0, keep all """ x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] idx = [] while order.size > 0: i = order[0] idx.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if mode == "Union": ovr = inter / (areas[i] + areas[order[1:]] - inter) elif mode == "Minimum": ovr = inter / np.minimum(areas[i], areas[order[1:]]) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return idx @classmethod def _square(self, bbox): """ convert rectangle bbox to square bbox Params: bbox: {ndarray(n_boxes, 5)} Returns: bbox_s: {ndarray(n_boxes, 5)} """ ## rectangle boxes' heights and widths x1, y1, x2, y2, score = np.hsplit(bbox, 5) # (n_boxes, 1) w = x2 - x1 +1; h = y2 - y1 + 1 # (n_boxes, 1) maxsize = np.maximum(w, h) # (n_boxes, 1) ## square boxes' heights and widths x1 = x1 + w/2 - maxsize/2 y1 = y1 + h/2 - maxsize/2 x2 = x1 + maxsize - 1 y2 = y1 + maxsize - 1 bbox_s = np.hstack([x1, y1, x2, y2, score]) return bbox_s @classmethod def _crop_patch(self, image, bbox_s, size): """ crop patches from image Params: image: {ndarray(H, W, C)} bbox_s: {ndarray(n_boxes, 5)} squared bbox Returns: patches: {list[ndarray(h, w, c)]} """ def locate(bbox, imh, imw): """ Params: bbox: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score imh, imw: {float} size of input image Returns: oriloc, dstloc: {ndarray(n_boxes, 4)} x1, y1, x2, y2 """ ## origin boxes' heights and widths x1, y1, x2, y2, score = np.hsplit(bbox_s, 5)# (n_boxes, 1) x1, y1, x2, y2 = list(map(lambda x: x.astype('int').reshape(-1), [x1, y1, x2, y2])) w = x2 - x1 + 1; h = y2 - y1 + 1 # (n_boxes, 1) ## destinate boxes xx1 = np.zeros_like(x1) yy1 = np.zeros_like(y1) xx2 = w.copy() - 1 yy2 = h.copy() - 1 ## left side out of image i = x1 < 0 xx1[i] = 0 + (0 - x1[i]) x1 [i] = 0 ## top side out of image i = y1 < 0 yy1[i] = 0 + (0 - y1[i]) y1 [i] = 0 ## right side out of image i = x2 > imw - 1 xx2[i] = (w[i]-1) + (imw-1 - x2[i]) x2 [i] = imw - 1 ## bottom side out of image i = y2 > imh - 1 yy2[i] = (h[i]-1) + (imh-1 - y2[i]) y2 [i] = imh - 1 return [x1, y1, x2, y2, xx1, yy1, xx2, yy2] imh, imw, _ = image.shape x1, y1, x2, y2, score = np.hsplit(bbox_s, 5) pw = x2 - x1 + 1; ph = y2 - y1 + 1 pshape = np.hstack([ph, pw, 3*np.ones(shape=(score.shape[0], 1))]).astype('int') # (n_boxes, 3) # keep = np.bitwise_or(pw > 0, ph > 0).reshape(-1) # pshape = pshape[keep]; bbox_s = bbox_s[keep] n_boxes = bbox_s.shape[0] x1, y1, x2, y2, xx1, yy1, xx2, yy2 = locate(bbox_s, imh, imw) # (n_boxes, 1) patches = [] for i_boxes in range(n_boxes): patch = np.zeros(shape=pshape[i_boxes], dtype='uint8') patch[yy1[i_boxes]: yy2[i_boxes], xx1[i_boxes]: xx2[i_boxes]] = \ image[y1[i_boxes]: y2[i_boxes], x1[i_boxes]: x2[i_boxes]] patch = cv2.resize(patch, (size, size)) patches += [patch] return patches