示例#1
0
def compute_rate(model,config,embeddings,images_placeholder,phase_train_placeholder,embedding_size,sess):
    err = 0.0
    total = 0.0
    HumanNames = os.listdir(config.input_dir)
    for (path,dirnames,filenames) in os.walk(config.input_dir):
        for filename in filenames:
            total += 1
            emb_array = np.zeros((1, embedding_size))
            img = cv2.imread(os.path.join(path,filename))
            img = img[:, :, 0:3]
            img = flip(img, False)
            img = cv2.resize(img, (config.lfw.image_size,config.lfw.image_size),interpolation=cv2.INTER_CUBIC)
            img = prewhiten(img)
            img = img.reshape(-1,config.lfw.image_size,config.lfw.image_size,3)
            feed_dict = {images_placeholder: img, phase_train_placeholder: False}
            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
            predictions = model.predict(emb_array)
            best_class_indices = predictions

            for H_i in HumanNames:
                if HumanNames[best_class_indices[0]] == H_i:
                    result_names = HumanNames[best_class_indices[0]]
                    print result_names,'---------',path.split('/')[-1]
                    if result_names != path.split('/')[-1]:
                        err += 1
    return 1 - err/total
 def tta_inference(self, img):
     pred = self.do_inference(img)
     for flip_idx in self.tta_flips:
         pred += flip(self.do_inference(flip(img, flip_idx)), flip_idx)
     pred /= len(self.tta_flips) + 1
     return pred
示例#3
0
    def __getitem__(self, index):
        # get global constants
        num_joints = conf.num_joints
        res_in = conf.res_in
        res_out = conf.res_out
        res_ratio = res_in / res_out

        # get raw data
        img = self.load_image(index)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        pts, c, s = self.get_part_info(index)
        r = 0

        # data augmentation (scaling and rotation)
        s = s * (2 ** rnd(conf.scale))
        r = 0 if np.random.random() < 0.6 else rnd(conf.rotate)
        inp = crop(img, c, s, r, res_in) / 255.

        # initialize valid joints
        valid2d = np.zeros((num_joints), dtype=np.float32)

        # set output heatmap and 2d pose
        pose2d = np.zeros((num_joints, 2), dtype=np.float32)
        hmap = np.zeros((int(num_joints), int(res_out), int(res_out)), dtype=np.float32)
        for i in range(16):
            if (conf.inds[i] != 7 and pts[i][0] > 1): # check whether there is a ground-truth annotation
                pt = transform(pts[i], c, s, r, res_in)
                pt = pt.astype(np.float32)
                if (pt[0] >= 0) and (pt[0] <= res_in-1) and (pt[1] >= 0) and (pt[1] <= res_in-1):
                    pose2d[conf.inds[i]] = pt
                    valid2d[conf.inds[i]] = 1.0
                    hmap[conf.inds[i]] = draw_gaussian(hmap[conf.inds[i]], pt/res_ratio+0.5, conf.std)
    
        # data augmentation (flipping and jittering)
        if np.random.random() < 0.5:
            inp = flip(inp)
            hmap = shuffle_lr(flip(hmap))
            pose2d = shuffle_lr(pose2d)
            for i in range(num_joints):
                if pose2d[i][0] > 1:
                    pose2d[i][0] = res_in - pose2d[i][0] - 1
        inp[0] = np.clip(inp[0] * (np.random.random() * .4 + .6), 0, 1)
        inp[1] = np.clip(inp[1] * (np.random.random() * .4 + .6), 0, 1)
        inp[2] = np.clip(inp[2] * (np.random.random() * .4 + .6), 0, 1)
        
        # 3d and camera information
        valid3d = 0.0
        bbox = np.array([0.0, 0.0, 255.0, 255.0], dtype=np.int32)
        pose3d = np.zeros((num_joints-1, 3), dtype=np.float32)
        cam_f = np.array([1.0, 1.0], dtype=np.float32)
        cam_c = np.array([0.0, 0.0], dtype=np.float32)
        meta3d = np.zeros((num_joints, 3), dtype=np.float32)
        action = 0
        coords_root = np.zeros((3), dtype=np.float32)
        depth_root = 0.
        depth_root_canonical = 0.

        # set data
        data = {'inp': inp, 'bbox': bbox,
            'hmap': hmap,
            'pose2d': pose2d, 'valid2d': valid2d,
            'pose3d': pose3d, 'valid3d': valid3d,
            'cam_f': cam_f, 'cam_c': cam_c,
            'meta3d': meta3d, 'action': action,
            'coords_root': coords_root,
            'depth_root': depth_root,
            'depth_root_canonical': depth_root_canonical
        }

        # return input image, output heatmap and 2d pose
        return index, data
示例#4
0
    def __getitem__(self, index):
        if self.split == 'train':
            index = np.random.randint(self.num_samples)

        # get global constants
        num_joints = conf.num_joints
        res_in = conf.res_in
        res_out = conf.res_out
        res_ratio = res_in / res_out

        # get image
        img = self.load_image(index)

        # get 2d/3d pose and bounding box
        pts, bbox, meta3d, cam_f, cam_c, action = self.get_part_info(index)
        cam_f = cam_f.astype(np.float32)
        cam_c = cam_c.astype(np.float32)
        action = action.item()

        # set 2d pose
        pts = pts - bbox[0:2]
        pts = pts / bbox[2:4]
        pts = pts * float(res_in - 1)

        # set 3d pose
        pose3d = meta3d.copy()
        pose3d = pose3d - pose3d[conf.root]

        # data augmentation (small random translation)
        inp = np.zeros_like(img)
        if self.split == 'train':
            xr = np.random.randint(9) - 4
            yr = np.random.randint(9) - 4
            in_x1, in_x2 = max(0, -xr), min(256, 256 - xr)
            in_y1, in_y2 = max(0, -yr), min(256, 256 - yr)
            out_x1, out_x2 = max(0, xr), min(256, 256 + xr)
            out_y1, out_y2 = max(0, yr), min(256, 256 + yr)
            inp[out_y1:out_y2, out_x1:out_x2, :] = img[in_y1:in_y2,
                                                       in_x1:in_x2, :]
            pts[:, 0] = pts[:, 0] + xr
            pts[:, 1] = pts[:, 1] + yr
        else:
            inp[:, :, :] = img[:, :, :]

        inp = cv2.cvtColor(inp, cv2.COLOR_BGR2RGB)
        inp = inp.transpose(2, 0, 1).astype(np.float32)

        # normalization
        inp = inp / 255.0

        # set valid joints
        valid2d = np.ones((num_joints), dtype=np.float32)
        valid3d = 1.0

        # set output heatmap and 2d pose
        pose2d = np.zeros((num_joints, 2), dtype=np.float32)
        hmap = np.zeros((int(num_joints), int(res_out), int(res_out)),
                        dtype=np.float32)
        for i in range(num_joints):
            pt = pts[i].astype(np.float32)
            pose2d[i] = pt
            hmap[i] = draw_gaussian(hmap[i], pt / res_ratio + 0.5, conf.std)

        # data augmentation (flipping and color jittering)
        if self.split == 'train':
            if np.random.random() < 0.5:
                inp = flip(inp)
                hmap = flip(hmap)
                hmap_flip = hmap.copy()
                for i in range(len(flip_index)):
                    hmap_flip[i] = hmap[flip_index[i]].copy()
                hmap = hmap_flip.copy()
                pose2d_flip = pose2d.copy()
                for i in range(len(flip_index)):
                    pose2d_flip[i] = pose2d[flip_index[i]].copy()
                pose2d = pose2d_flip.copy()
                pose2d[:, 0] = conf.res_in - pose2d[:, 0]
                pose3d_flip = pose3d.copy()
                for i in range(len(flip_index)):
                    pose3d_flip[i] = pose3d[flip_index[i]].copy()
                pose3d = pose3d_flip.copy()
                pose3d[:, 0] *= -1
            inp[0] = np.clip(inp[0] * (np.random.random() * .4 + .6), 0, 1)
            inp[1] = np.clip(inp[1] * (np.random.random() * .4 + .6), 0, 1)
            inp[2] = np.clip(inp[2] * (np.random.random() * .4 + .6), 0, 1)

        # root coordinates
        coords_root = meta3d[conf.root].copy()
        depth_root = coords_root[2].copy()
        depth_root_canonical = coords_root[2].copy() / np.sqrt(np.prod(cam_f))

        # set 3d pose
        pose3d = np.delete(pose3d, (conf.root), axis=0)

        # set data
        data = {
            'inp': inp,
            'bbox': bbox,
            'hmap': hmap,
            'pose2d': pose2d,
            'valid2d': valid2d,
            'pose3d': pose3d,
            'valid3d': valid3d,
            'cam_f': cam_f,
            'cam_c': cam_c,
            'meta3d': meta3d,
            'action': action,
            'coords_root': coords_root,
            'depth_root': depth_root,
            'depth_root_canonical': depth_root_canonical
        }

        # return
        return index, data
示例#5
0
def main(config):
    
    print('Creating networks and loading parameters')
    
    graph_detect = tf.Graph()
    with graph_detect.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config,sess)
            
            load_model(config.lfw.valid_model_path)
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            
            classifier_filename = config.classifier_path
            classifier_filename_exp = os.path.expanduser(classifier_filename)
            print('load classifier file-> %s' % classifier_filename_exp)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)
                
            HumanNames = os.listdir(config.input_dir)
            
            video_capture = cv2.VideoCapture(0)
            prevTime = 0
            c = 0
            while True:
                ret, frame = video_capture.read()
                frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    # resize frame (optional), 
                curTime = time.time()    # calc fps
                timeF = config.mtcnn.frame_interval
                if (c % timeF == 0):
                    if frame.ndim == 2:
                        frame = to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detector.detect_face(frame)
                    nrof_faces = bounding_boxes.shape[0]
                    
                    print('Detected_FaceNum: %d' % nrof_faces)
                    
                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces,4), dtype=np.int32)
                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))
                            
                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]
                            
                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                print('face is inner of range!')
                                continue
                            cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            cropped[i] = flip(cropped[i], False)
                            scaled.append(misc.imresize(cropped[i], (182, 182), interp='bilinear'))
                            scaled[i] = cv2.resize(scaled[i], (160,160),interpolation=cv2.INTER_CUBIC)
                            scaled[i] = prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(-1,160,160,3))
                            feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)
                            best_class_indices = np.argmax(predictions, axis=1)
#                            best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
#                            print "best_class_probabilities:", best_class_probabilities
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) 
                            
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20     
                            print('result: ', best_class_indices[0])
                            for H_i in HumanNames:
                                if HumanNames[best_class_indices[0]] == H_i:
                                    result_names = HumanNames[best_class_indices[0]]
                                    cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                                1, (0, 0, 255), thickness=1, lineType=2)
                    else:
                        print('Unable to align')
                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                string = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame, string, (text_fps_x, text_fps_y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2)
                c += 1
                cv2.imshow('Video', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            video_capture.release()
            cv2.destroyAllWindows()