def compute_rate(model,config,embeddings,images_placeholder,phase_train_placeholder,embedding_size,sess): err = 0.0 total = 0.0 HumanNames = os.listdir(config.input_dir) for (path,dirnames,filenames) in os.walk(config.input_dir): for filename in filenames: total += 1 emb_array = np.zeros((1, embedding_size)) img = cv2.imread(os.path.join(path,filename)) img = img[:, :, 0:3] img = flip(img, False) img = cv2.resize(img, (config.lfw.image_size,config.lfw.image_size),interpolation=cv2.INTER_CUBIC) img = prewhiten(img) img = img.reshape(-1,config.lfw.image_size,config.lfw.image_size,3) feed_dict = {images_placeholder: img, phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict(emb_array) best_class_indices = predictions for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] print result_names,'---------',path.split('/')[-1] if result_names != path.split('/')[-1]: err += 1 return 1 - err/total
def tta_inference(self, img): pred = self.do_inference(img) for flip_idx in self.tta_flips: pred += flip(self.do_inference(flip(img, flip_idx)), flip_idx) pred /= len(self.tta_flips) + 1 return pred
def __getitem__(self, index): # get global constants num_joints = conf.num_joints res_in = conf.res_in res_out = conf.res_out res_ratio = res_in / res_out # get raw data img = self.load_image(index) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) pts, c, s = self.get_part_info(index) r = 0 # data augmentation (scaling and rotation) s = s * (2 ** rnd(conf.scale)) r = 0 if np.random.random() < 0.6 else rnd(conf.rotate) inp = crop(img, c, s, r, res_in) / 255. # initialize valid joints valid2d = np.zeros((num_joints), dtype=np.float32) # set output heatmap and 2d pose pose2d = np.zeros((num_joints, 2), dtype=np.float32) hmap = np.zeros((int(num_joints), int(res_out), int(res_out)), dtype=np.float32) for i in range(16): if (conf.inds[i] != 7 and pts[i][0] > 1): # check whether there is a ground-truth annotation pt = transform(pts[i], c, s, r, res_in) pt = pt.astype(np.float32) if (pt[0] >= 0) and (pt[0] <= res_in-1) and (pt[1] >= 0) and (pt[1] <= res_in-1): pose2d[conf.inds[i]] = pt valid2d[conf.inds[i]] = 1.0 hmap[conf.inds[i]] = draw_gaussian(hmap[conf.inds[i]], pt/res_ratio+0.5, conf.std) # data augmentation (flipping and jittering) if np.random.random() < 0.5: inp = flip(inp) hmap = shuffle_lr(flip(hmap)) pose2d = shuffle_lr(pose2d) for i in range(num_joints): if pose2d[i][0] > 1: pose2d[i][0] = res_in - pose2d[i][0] - 1 inp[0] = np.clip(inp[0] * (np.random.random() * .4 + .6), 0, 1) inp[1] = np.clip(inp[1] * (np.random.random() * .4 + .6), 0, 1) inp[2] = np.clip(inp[2] * (np.random.random() * .4 + .6), 0, 1) # 3d and camera information valid3d = 0.0 bbox = np.array([0.0, 0.0, 255.0, 255.0], dtype=np.int32) pose3d = np.zeros((num_joints-1, 3), dtype=np.float32) cam_f = np.array([1.0, 1.0], dtype=np.float32) cam_c = np.array([0.0, 0.0], dtype=np.float32) meta3d = np.zeros((num_joints, 3), dtype=np.float32) action = 0 coords_root = np.zeros((3), dtype=np.float32) depth_root = 0. depth_root_canonical = 0. # set data data = {'inp': inp, 'bbox': bbox, 'hmap': hmap, 'pose2d': pose2d, 'valid2d': valid2d, 'pose3d': pose3d, 'valid3d': valid3d, 'cam_f': cam_f, 'cam_c': cam_c, 'meta3d': meta3d, 'action': action, 'coords_root': coords_root, 'depth_root': depth_root, 'depth_root_canonical': depth_root_canonical } # return input image, output heatmap and 2d pose return index, data
def __getitem__(self, index): if self.split == 'train': index = np.random.randint(self.num_samples) # get global constants num_joints = conf.num_joints res_in = conf.res_in res_out = conf.res_out res_ratio = res_in / res_out # get image img = self.load_image(index) # get 2d/3d pose and bounding box pts, bbox, meta3d, cam_f, cam_c, action = self.get_part_info(index) cam_f = cam_f.astype(np.float32) cam_c = cam_c.astype(np.float32) action = action.item() # set 2d pose pts = pts - bbox[0:2] pts = pts / bbox[2:4] pts = pts * float(res_in - 1) # set 3d pose pose3d = meta3d.copy() pose3d = pose3d - pose3d[conf.root] # data augmentation (small random translation) inp = np.zeros_like(img) if self.split == 'train': xr = np.random.randint(9) - 4 yr = np.random.randint(9) - 4 in_x1, in_x2 = max(0, -xr), min(256, 256 - xr) in_y1, in_y2 = max(0, -yr), min(256, 256 - yr) out_x1, out_x2 = max(0, xr), min(256, 256 + xr) out_y1, out_y2 = max(0, yr), min(256, 256 + yr) inp[out_y1:out_y2, out_x1:out_x2, :] = img[in_y1:in_y2, in_x1:in_x2, :] pts[:, 0] = pts[:, 0] + xr pts[:, 1] = pts[:, 1] + yr else: inp[:, :, :] = img[:, :, :] inp = cv2.cvtColor(inp, cv2.COLOR_BGR2RGB) inp = inp.transpose(2, 0, 1).astype(np.float32) # normalization inp = inp / 255.0 # set valid joints valid2d = np.ones((num_joints), dtype=np.float32) valid3d = 1.0 # set output heatmap and 2d pose pose2d = np.zeros((num_joints, 2), dtype=np.float32) hmap = np.zeros((int(num_joints), int(res_out), int(res_out)), dtype=np.float32) for i in range(num_joints): pt = pts[i].astype(np.float32) pose2d[i] = pt hmap[i] = draw_gaussian(hmap[i], pt / res_ratio + 0.5, conf.std) # data augmentation (flipping and color jittering) if self.split == 'train': if np.random.random() < 0.5: inp = flip(inp) hmap = flip(hmap) hmap_flip = hmap.copy() for i in range(len(flip_index)): hmap_flip[i] = hmap[flip_index[i]].copy() hmap = hmap_flip.copy() pose2d_flip = pose2d.copy() for i in range(len(flip_index)): pose2d_flip[i] = pose2d[flip_index[i]].copy() pose2d = pose2d_flip.copy() pose2d[:, 0] = conf.res_in - pose2d[:, 0] pose3d_flip = pose3d.copy() for i in range(len(flip_index)): pose3d_flip[i] = pose3d[flip_index[i]].copy() pose3d = pose3d_flip.copy() pose3d[:, 0] *= -1 inp[0] = np.clip(inp[0] * (np.random.random() * .4 + .6), 0, 1) inp[1] = np.clip(inp[1] * (np.random.random() * .4 + .6), 0, 1) inp[2] = np.clip(inp[2] * (np.random.random() * .4 + .6), 0, 1) # root coordinates coords_root = meta3d[conf.root].copy() depth_root = coords_root[2].copy() depth_root_canonical = coords_root[2].copy() / np.sqrt(np.prod(cam_f)) # set 3d pose pose3d = np.delete(pose3d, (conf.root), axis=0) # set data data = { 'inp': inp, 'bbox': bbox, 'hmap': hmap, 'pose2d': pose2d, 'valid2d': valid2d, 'pose3d': pose3d, 'valid3d': valid3d, 'cam_f': cam_f, 'cam_c': cam_c, 'meta3d': meta3d, 'action': action, 'coords_root': coords_root, 'depth_root': depth_root, 'depth_root_canonical': depth_root_canonical } # return return index, data
def main(config): print('Creating networks and loading parameters') graph_detect = tf.Graph() with graph_detect.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config,sess) load_model(config.lfw.valid_model_path) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = config.classifier_path classifier_filename_exp = os.path.expanduser(classifier_filename) print('load classifier file-> %s' % classifier_filename_exp) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) HumanNames = os.listdir(config.input_dir) video_capture = cv2.VideoCapture(0) prevTime = 0 c = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) # resize frame (optional), curTime = time.time() # calc fps timeF = config.mtcnn.frame_interval if (c % timeF == 0): if frame.ndim == 2: frame = to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detector.detect_face(frame) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (182, 182), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (160,160),interpolation=cv2.INTER_CUBIC) scaled[i] = prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,160,160,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) # best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] # print "best_class_probabilities:", best_class_probabilities cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 print('result: ', best_class_indices[0]) for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) string = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, string, (text_fps_x, text_fps_y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2) c += 1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()