def forward(self, inputs): if self.training: imgs, annotations = inputs else: imgs = inputs x = self.conv1(imgs) x1 = self.layer1(x) x2 = self.layer2(x1) x3 = self.layer3(x2) x4 = self.layer4(x3) features = self.fpn([x2, x3, x4]) reg_feats = torch.cat([self.regression(feat) for feat in features], dim=1) cls_feats = torch.cat([self.classification(feat) for feat in features], dim=1) anchors = self.anchors(imgs) if self.training: return self.focalLoss(cls_feats, reg_feats, anchors, annotations) else: refined_anchors = self.regressBoxes(anchors, reg_feats) refined_anchors = self.clipBoxes(refined_anchors, imgs) scores, _ = torch.max(cls_feats, dim=2, keepdim=True) scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: return torch.zeros(0), torch.zeros(0), torch.zeros(0, 4) cls_feats = cls_feats[:, scores_over_thresh, :] refined_anchors = refined_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx, _ = gpu_nms( torch.cat([refined_anchors, scores], dim=2)[0, :, :], 0.5) nms_scores, nms_class = cls_feats[0, anchors_nms_idx, :].max(dim=1) return [ nms_scores, nms_class, refined_anchors[0, anchors_nms_idx, :] ]
def nms(self, boxes, scores, classes, confidence, nms_threshold): num_classes = len(self.class_names) scores_conv = np.zeros((scores.shape[0], num_classes)) for i, c_idx in enumerate(classes): scores_conv[i][c_idx] = scores[i] use_gpu_nms = False if use_gpu_nms: boxes, scores, classes = nms.gpu_nms(boxes, scores_conv, num_classes, max_boxes=50, score_thresh=confidence, iou_thresh=nms_threshold) boxes, scores, classes = self.sess.run([boxes, scores, classes]) else: boxes, scores, classes = nms.cpu_nms(boxes, scores_conv, num_classes, max_boxes=50, score_thresh=confidence, iou_thresh=nms_threshold) return boxes, scores, classes
def _nms(dets): return nms.gpu_nms(dets, thresh, device_id)
cv2_path = '/media/disk1/yangfan/opencv-2.4.13.2/lib' sys.path.insert(0, cv2_path) import cv2 import mxnet as mx print mx.__version__ from mtcnn_detector import MtcnnDetector from time import time from nms.gpu_nms import * from config import GPU_ID if True: boxes = np.zeros((10, 5)) boxes = boxes.astype('float32') pick = gpu_nms(boxes, float(0.7), GPU_ID) threshold = [0.5, 0.5, 0.6] ctx = mx.gpu(GPU_ID) print ctx detector = MtcnnDetector(model_folder='model', ctx=ctx, num_worker=20, threshold=threshold, accurate_landmark=True, minsize=40) detect_face = detector.detect_face age_dict = {} age_dict[0] = [0, 2] age_dict[1] = [3, 7] age_dict[2] = [8, 13] age_dict[3] = [14, 18]
def detect_first_stage(img, index, threshold, ctx): # return None """ run PNet for first stage Parameters: ---------- img: numpy array, bgr order input image scale: float number how much should the input image scale net: PNet worker Returns: ------- total_boxes : bboxes """ # print index scale = real_scales[index] height, width, _ = img.shape hs = int(height * scale) ws = int(width * scale) # img = mx.nd.array(img) # im_data = mx.image.imresize(img, hs, ws) im_data = cv2.resize(img, (ws, hs)) # adjust for the network input input_buf = adjust_input(im_data) # print 'prepare data:%.4f'%(end_time - start_time) # print input_buf.shape # output = net.predict(input_buf) #net.forward(data = mx.nd.array(input_buf)) # start_time = time() # data_shape = [("data", input_buf.shape)] # input_shapes = dict(data_shape) # executor = net.simple_bind(ctx = ctx, **input_shapes) # for key in executor.arg_dict.keys(): # if key in arg_params: # arg_params[key].copyto(executor.arg_dict[key]) #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/' # end_time = time() # print 'binding parameters: %.2f'%(end_time - start_time) # start_time = time() # data_shape = [("data", input_buf.shape)] # input_shapes = dict(data_shape) # executor = executor.reshape(allow_up_sizing = True, **input_shapes) # end_time = time() #print 'reshape time %.4f'%(end_time - start_time) real_executors[index].forward(is_train=False, data=input_buf) output = real_executors[index].outputs[0].asnumpy() if has_reg == True: reg = real_executors[index].outputs[1].asnumpy() # print 'test1' # print output.shape # print 'scale:%.2f, time:%.4f'%(scale, end_time - start_time) output_hs = ((hs - 2) / 2) - 2 - 2 output_ws = ((ws - 2) / 2) - 2 - 2 # print output_hs # print output_ws # for i in range(output.shape[1]): # for j in range(output.shape[2]): # for k in range(output.shape[3]): # if output[0][i][j][k] > 0.9: # print '%d, %d, %d' %(i, j, k) #result = np.where(output[0] > 0.9) #result[0] #output = np.transpose(output, (0, 3, 1, 2)) # output = output.reshape((1, output_hs, output_ws, 2)) # print output[0, 1, :, :] if has_reg == True: boxes = generate_bbox(output[0][1, :, :], reg, scale, threshold) else: boxes = generate_bbox(output[0][1, :, :], output[0], scale, threshold) # print 'generated bbox: %d'%(len(boxes)) if boxes.size == 0: return None # print 'test2' # nms #print 'generating box time: %.4f'%(end_time - start_time) #print 'generating box:%d'%(boxes.shape[0]) #pick = nms(boxes[:,0:5], 0.5, mode='Union') # boxes.dtype = 'float32' boxes = boxes.astype('float32') pick = gpu_nms(boxes[:, 0:5], 0.5, int(ctx.__str__()[4])) #print pick # print 'nms:' + str(len(pick)) boxes = boxes[pick] #print 'nms time: %.4f'%(end_time - start_time) return boxes
def detect_face(self, img): """ detect face over img Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image Retures: ------- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score) bboxes points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) landmarks """ # check input global_start_time = time() global_first_start_time = time() if img is None: return None # only works for color image if len(img.shape) != 3: return None # detected boxes # total_boxes = [] # height, width, _ = img.shape # minl = min( height, width) # get all the valid scales # scales = [] # m = MIN_DET_SIZE/self.minsize # minl *= m # factor_count = 0 # while minl > MIN_DET_SIZE: # scales.append(m*self.factor**factor_count) # minl *= self.factor # factor_count += 1 ############################################# # first stage ############################################# total_boxes = [] i = 0 self.index = [] self.t = [] for scale in self.scales: return_boxes = detect_first_stage(img, i, self.threshold[0], self.ctx) if return_boxes is not None: total_boxes.append(return_boxes) i += 1 # return_boxes = self.Pool.apply_async(detect_first_stage_warpper, (img, i, self.threshold[0], self.ctx)) # self.index.append(i) # return_boxes = self.Pool.map(detect_first_stage_warpper, \ # izip(repeat(img), [i])) # start_time1 = time() #self.t.append(MyThread((img, self.executor1[i], scale, self.threshold[0], self.ctx))) #self.t[i].start() # i += 1 # for j in range(i): # self.t[j].join() # return_boxes = self.t[j].return_boxes # if return_boxes is not None: # total_boxes.append(return_boxes) # end_time1 = time() #print 'append time: %.4f'%(end_time1 - start_time1) # self.Pool.close() # self.Pool.join() # print 'first stage time:%.4f'%(end_time - start_time) #print 'first stage end' # sliced_index = self.slice_index(len(scales)) # total_boxes = [] # for batch in sliced_index: # local_boxes = self.Pool.map( detect_first_stage_warpper, \ # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) ) # total_boxes.extend(local_boxes) # remove the Nones total_boxes = [i for i in total_boxes if i is not None] if len(total_boxes) == 0: if has_landmark == True: return None, None else: return None return None #print 'before' #print len(total_boxes) total_boxes = np.vstack(total_boxes) #print 'after' #print total_boxes.shape if total_boxes.size == 0: if has_landmark == True: return None, None else: return None return None # merge the detection from first stage #print 'global nms:' + str(total_boxes.shape[0]) total_boxes.dtype = 'float32' pick = gpu_nms(total_boxes[:, 0:5], float(0.7), GPU_ID) #pick = nms(total_boxes[:, 0:5], 0.7, 'Union') total_boxes = total_boxes[pick] #print 'global nms time:%.4f'%(end_time - start_time) # refine the bboxes if first_has_reg == True: bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 # total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw, # total_boxes[:, 1]+total_boxes[:, 6] * bbh, # total_boxes[:, 2]+total_boxes[:, 7] * bbw, # total_boxes[:, 3]+total_boxes[:, 8] * bbh, # total_boxes[:, 4] # ]) # total_boxes = total_boxes.T total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) #return total_boxes ############################################# # second stage ############################################# num_box = total_boxes.shape[0] print 'first stage num: %d' % (num_box) #return total_boxes # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, self.width, self.height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((self.second_stage_num, 3, 24, 24), dtype=np.float32) #print 'global_first time;%.4f'%(global_first_end_time - global_first_start_time) for i in range(num_box): if i >= self.second_stage_num: break tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] # tmp = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) # input_buf[i, :, :, :] = adjust_input(mx.image.imresize(tmp, 24, 24).asnumpy()) #print 'prepare data: %.4f'%(end_time - start_time) if len(input_buf) < self.second_stage_num: input_buf = np.lib.pad( input_buf, ((self.second_stage_num - len(input_buf), 0), (0, 0), (0, 0), (0, 0)), 'constant') #print 'first stage :' + str(num_box) if True: # start_time = time() # data_shape = [("data", input_buf.shape)] # input_shapes = dict(data_shape) # self.executor2 = self.executor2.reshape(allow_up_sizing = True, **input_shapes) # end_time = time() # print 'reshape time: %.4f'%(end_time - start_time) #executor = self.RNet.simple_bind(ctx = self.ctx, **input_shapes) #for key in executor.arg_dict.keys(): # if key in self.arg_params2: # self.arg_params2[key].copyto(executor.arg_dict[key]) #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/' start_time = time() self.executor2.forward(is_train=False, data=input_buf) output1 = self.executor2.outputs[0].asnumpy() output2 = self.executor2.outputs[1].asnumpy() # print 'test1' end_time = time() # print 'second stage time: %.4f'%(end_time - start_time) # print output.shape # print end_time - start_time #output = self.RNet.predict(input_buf) # print output[:,:] # filter the total_boxes with threshold if has_reg == True: passed = np.where(output1[:, 1] > self.threshold[1]) else: # print output.shape passed = np.where(output[:, 1] > self.threshold[1]) #print output1[:, :] total_boxes = total_boxes[passed] if total_boxes.size == 0: if has_landmark == True: return None, None else: return None # print output2 if has_reg == True: total_boxes[:, 4] = output1[passed, 1].reshape((-1, )) reg = output2[passed] else: total_boxes[:, 4] = output[passed, 1].reshape((-1, )) # nms pick = gpu_nms(total_boxes, 0.7, GPU_ID) total_boxes = total_boxes[pick] if has_reg == True: total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) #print 'second nms:%.4f'%(end_time -start_time) ############################################# # third stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, self.width, self.height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((self.third_stage_num, 3, 48, 48), dtype=np.float32) #global_second_end_time = time() #print 'global second time:%.4f'%(global_second_end_time - global_second_start_time) #global_third_start_time = time() #start_time = time() for i in range(num_box): if i >= self.third_stage_num: break tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) if len(input_buf) < self.third_stage_num: input_buf = np.lib.pad(input_buf, (self.third_stage_num - len(input_buf, 0), (0, 0), (0, 0), (0, 0)), 'constant') print 'second stage :' + str(num_box) #end_time = time() #print 'prepare data third stage:%.4f'%(end_time - start_time) #return total_boxes if True: # data_shape = [("data", input_buf.shape)] # input_shapes = dict(data_shape) # executor = self.ONet.simple_bind(ctx = self.ctx, **input_shapes) # for key in executor.arg_dict.keys(): # if key in self.arg_params3: # self.arg_params3[key].copyto(executor.arg_dict[key]) #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/' # start_time = time() # data_shape = [("data", input_buf.shape)] # input_shapes = dict(data_shape) # self.executor3 = self.executor3.reshape(allow_up_sizing = True, **input_shapes) # end_time = time() # print 'reshape time: %.4f'%(end_time - start_time) # start_time = time() self.executor3.forward(is_train=False, data=input_buf) output1 = self.executor3.outputs[0].asnumpy() output2 = self.executor3.outputs[1].asnumpy() output3 = self.executor3.outputs[2].asnumpy() output3_1 = self.executor3.outputs[3].asnumpy() print output3_1.shape # print 'test1' # end_time = time() # print 'third stage time: %.4f'%(end_time - start_time) # print output.shape # print end_time - start_time #output = self.RNet.predict(input_buf) #output = self.ONet.predict(input_buf) # print output # filter the total_boxes with threshold passed = np.where(output1[:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: if has_landmark == True: return None, None else: return None total_boxes[:, 4] = output1[passed, 1].reshape((-1, )) if has_reg == True: reg = output2[passed] if has_landmark == True: points = output3[passed] # compute landmark points if has_landmark == True: bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 #for i in range(len(points)): for t in range(10): if t % 2 == 0: points[:, t] = points[:, t] * bbw + total_boxes[:, 0] else: points[:, t] = points[:, t] * bbh + total_boxes[:, 1] #points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] #points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms # start_time = time() if has_reg == True: total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] if has_landmark == True: points = points[pick] # global_end_time = time() # print 'third time %.4f'%(global_end_time - start_time) # print 'global time %.4f'%(global_end_time - global_start_time) # print 'global third time: %.4f'%(global_end_time - global_third_start_time) if not self.accurate_landmark: if has_landmark == True: return total_boxes, points else: return total_boxes #return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] # patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1) # patchw = np.round(patchw*0.25) # make it even # patchw[np.where(np.mod(patchw,2) == 1)] += 1 # input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) # for i in range(5): # x, y = points[:, i], points[:, i+5] # x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw) # [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T, # width, # height) # for j in range(num_box): # tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) # tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :] # input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24))) total_boxes_tmp = self.convert_to_square(total_boxes) #total_boxes_tmp = total_boxes.copy() total_boxes_tmp[:, 0:4] = np.round(total_boxes_tmp[:, 0:4]) if False: width = total_boxes_tmp[:, 2] - total_boxes_tmp[:, 0] height = total_boxes_tmp[:, 3] - total_boxes_tmp[:, 1] total_boxes_tmp[:, 0] += np.round(0.1 * (width)) # index = np.where(total_boxes_tmp[:, 0] < 0) # total_boxes_tmp[index, 0] = 0 total_boxes_tmp[:, 1] += np.round(0.1 * (height)) # index = np.where(total_boxes_tmp[:, 1] < 0) # total_boxes_tmp[index, 1] = 0 total_boxes_tmp[:, 2] -= np.round(0.1 * (width)) # index = np.where(total_boxes_tmp[:, 2] >= self.width) # total_boxes_tmp[index, 2] = self.width - 1 total_boxes_tmp[:, 3] -= np.round(0.1 * (height)) # index = np.where(total_boxes_tmp[:, 3] >= self.height) # total_boxes_tmp[index, 3] = self.height - 1 [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes_tmp, self.width, self.height) input_buf = np.zeros((self.fourth_stage_num, 3, 48, 48), dtype=np.float32) input_buf2 = np.zeros((self.fourth_stage_num, 3, 48, 48), dtype=np.float32) input_buf3 = np.zeros((self.fourth_stage_num, 3, 64, 64), dtype=np.float32) input_buf4 = np.zeros((self.fourth_stage_num, 3, 96, 96), dtype=np.float32) #input_buf_rotate = np.zeros((self.fourth_stage_num, 3, 48, 48), dtype=np.float32) num_box = len(total_boxes_tmp) index = np.zeros((self.fourth_stage_num), dtype=np.uint8) for i in range(num_box): if i >= self.fourth_stage_num: break tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) if tmph[i] > 100 or tmpw[i] > 100: index[i] = 1 tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] # tmp = img[y[i]: ey[i] + 1, x[i]: ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) # height = tmp.shape[0] # width = tmp.shape[1] # if height > 80 or width > 80: # tmp = cv2.resize(tmp, (height / 8, width / 8)) input_buf2[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) input_buf3[i, :, :, :] = adjust_input(cv2.resize(tmp, (64, 64))) input_buf4[i, :, :, :] = adjust_input(cv2.resize(tmp, (96, 96))) #input_buf_rotate[i] = input_buf[i].copy() if len(input_buf) < self.fourth_stage_num: input_buf = np.lib.pad(input_buf, (0, self.fourth_stage_num - len(input_buf)), (0, 0), (0, 0), (0, 0), 'constant') input_buf2 = np.lib.pad( input_buf2, (0, self.fourth_stage_num - len(input_buf2)), (0, 0), (0, 0), (0, 0), 'constant') input_buf3 = np.lib.pad( input_buf3, (0, self.fourth_stage_num - len(input_buf3)), (0, 0), (0, 0), (0, 0), 'constant') input_buf4 = np.lib.pad( input_buf4, (0, self.fourth_stage_num - len(input_buf4)), (0, 0), (0, 0), (0, 0), 'constant') #print 'third stage :' + str(num_box) #print 'prepare data fourth stage: %.4f'%(end_time - start_time) self.executor4_0.forward(is_train=False, data=input_buf) output0_0 = self.executor4_0.outputs[0].asnumpy() output0_1 = self.executor4_0.outputs[1].asnumpy() output0_2 = self.executor4_0.outputs[2].asnumpy() output0_0 *= 90. output0_1 *= 90. output0_2 *= 90. #for t in range(input_buf_rotate.shape[0]): # if output0_2[t] > 15 or output0_2[t] < -15: # tmp_img = input_buf_rotate[t].transpose((1, 2, 0)) # tmp_img = tmp_img / 0.0078125 + 127.5 # angle = output0_2[t] # scale = 0.9 # rotateMat = cv2.getRotationMatrix2D((48 / 2, 48 / 2), angle, scale) # rotateImg = cv2.warpAffine(tmp_img, rotateMat, (48, 48)) # rotateImg = rotateImg.transpose((2, 0, 1)) # rotateImg = (rotateImg - 127.5) * 0.007812 # input_buf_rotate[t, :, :, :] = rotateImg self.executor4_1.forward(is_train=False, data=input_buf) self.executor4_3.forward(is_train=False, data=input_buf) # self.executor4_4.forward(is_train = False, data = input_buf2) output1 = self.executor4_1.outputs[0].asnumpy() output2 = self.executor4_3.outputs[0].asnumpy() # output2_1 = self.executor4_4.outputs[0].asnumpy() # pick = np.argmax(output2, axis = 1) # pick = (pick * 10 + 5) / 100.0 # pick = np.reshape(pick, (pick.shape[0], 1)) # output4 = self.executor4.outputs[3].asnumpy() #print 'cnn fourth stage: %.4f'%(end_time - start_time) # output = self.LNet.predict(input_buf) if num_box > self.fourth_stage_num: num_box = self.fourth_stage_num # for tt in range(num_box): # if index[tt] == 0: # output2[tt, :] = output2_1[tt, :] total_boxes = np.hstack([ total_boxes_tmp[0:num_box], output1[0:num_box, 0:1], output2[0:num_box, 1:2] ]) #return total_boxes[0:num_box], points[0: num_box] self.executor4_2.forward(is_train=False, data=input_buf) output3 = self.executor4_2.outputs[0].asnumpy() self.executor4_5.forward(is_train=False, data=input_buf) output4 = self.executor4_5.outputs[0].asnumpy() # print 'cnn fifth stage: %.4f'%(end_time - start_time) # for i in range(101): # output1[0:num_box, 0] += i * output1[0:num_box, i] #pick = np.argmax(output1, axis = 1) #pick = pick * 10 #pick = np.reshape(pick, (pick.shape[0], 1)) total_boxes = np.hstack( [total_boxes[0:num_box], output3[0:num_box, 1:2]]) # total_boxes[0:num_box, 5] = output1[:, 0] self.executor5.forward(is_train=False, data=input_buf2) output1 = self.executor5.outputs[0].asnumpy() age = np.zeros((num_box, 1), dtype=np.float32) for i in range(num_box): age[i] = output1[i][0] * 1.0 + output1[i][1] * 5.0 + output1[i][ 2] * 11 + output1[i][3] * 16 + output1[i][4] * 23 + output1[i][ 5] * 28 + output1[i][6] * 33 + output1[i][7] * 40 pick = np.argmax(output1, axis=1) #pick = (pick - 1) * 5 + 10 pick = np.reshape(pick, (pick.shape[0], 1)) output1 = np.max(output1, axis=1) output1 = np.reshape(output1, (output1.shape[0], 1)) total_boxes = np.hstack([ total_boxes[0:num_box], output1[0:num_box], pick[0:num_box], age, output4[0:num_box, 1:2], output0_0[0:num_box], output0_1[0:num_box], output0_2[0:num_box] ]) self.executor_true.forward(is_train=False, data=input_buf3) output1 = self.executor_true.outputs[0].asnumpy() self.executor_clear.forward(is_train=False, data=input_buf4) output2 = self.executor_clear.outputs[0].asnumpy() total_boxes = np.hstack([ total_boxes[0:num_box], output1[0:num_box, 1:2], output2[0:num_box, 1:2] ]) return total_boxes[0:num_box], points[0:num_box] pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points